mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2026-02-22 16:33:08 +00:00
v25.4
This commit is contained in:
@@ -6,15 +6,15 @@
|
||||
|
||||
#if defined (BLAKE_4WAY)
|
||||
|
||||
blake256r14_4way_context blake_4w_ctx;
|
||||
blake256r14_4x32_context blake_4w_ctx;
|
||||
|
||||
void blakehash_4way(void *state, const void *input)
|
||||
{
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake256r14_4way_context ctx;
|
||||
blake256r14_4x32_context ctx;
|
||||
memcpy( &ctx, &blake_4w_ctx, sizeof ctx );
|
||||
blake256r14_4way_update( &ctx, input + (64<<2), 16 );
|
||||
blake256r14_4way_close( &ctx, vhash );
|
||||
blake256r14_4x32_update( &ctx, input + (64<<2), 16 );
|
||||
blake256r14_4x32_close( &ctx, vhash );
|
||||
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
@@ -35,8 +35,8 @@ int scanhash_blake_4way( struct work *work, uint32_t max_nonce,
|
||||
HTarget = 0x7f;
|
||||
|
||||
v128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
blake256r14_4way_init( &blake_4w_ctx );
|
||||
blake256r14_4way_update( &blake_4w_ctx, vdata, 64 );
|
||||
blake256r14_4x32_init( &blake_4w_ctx );
|
||||
blake256r14_4x32_update( &blake_4w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
*noncev = v128_bswap32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
@@ -61,15 +61,15 @@ int scanhash_blake_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#if defined(BLAKE_8WAY)
|
||||
|
||||
blake256r14_8way_context blake_8w_ctx;
|
||||
blake256r14_8x32_context blake_8w_ctx;
|
||||
|
||||
void blakehash_8way( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
blake256r14_8way_context ctx;
|
||||
blake256r14_8x32_context ctx;
|
||||
memcpy( &ctx, &blake_8w_ctx, sizeof ctx );
|
||||
blake256r14_8way( &ctx, input + (64<<3), 16 );
|
||||
blake256r14_8way_close( &ctx, vhash );
|
||||
blake256r14_8x32( &ctx, input + (64<<3), 16 );
|
||||
blake256r14_8x32_close( &ctx, vhash );
|
||||
_dintrlv_8x32( state, state+ 32, state+ 64, state+ 96,
|
||||
state+128, state+160, state+192, state+224,
|
||||
vhash, 256 );
|
||||
@@ -93,8 +93,8 @@ int scanhash_blake_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
|
||||
blake256r14_8way_init( &blake_8w_ctx );
|
||||
blake256r14_8way( &blake_8w_ctx, vdata, 64 );
|
||||
blake256r14_8x32_init( &blake_8w_ctx );
|
||||
blake256r14_8x32( &blake_8w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
|
||||
@@ -423,33 +423,6 @@ void blake256_transform_le( uint32_t *H, const uint32_t *buf,
|
||||
(state)->T1 = T1; \
|
||||
} while (0)
|
||||
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
|
||||
#define BLAKE256_4X32_BLOCK_BSWAP32 \
|
||||
{ \
|
||||
v128_t shuf_bswap32 = v128_set64( 0x0c0d0e0f08090a0b, \
|
||||
0x0405060700010203 ); \
|
||||
M0 = _mm_shuffle_epi8( buf[ 0], shuf_bswap32 ); \
|
||||
M1 = _mm_shuffle_epi8( buf[ 1], shuf_bswap32 ); \
|
||||
M2 = _mm_shuffle_epi8( buf[ 2], shuf_bswap32 ); \
|
||||
M3 = _mm_shuffle_epi8( buf[ 3], shuf_bswap32 ); \
|
||||
M4 = _mm_shuffle_epi8( buf[ 4], shuf_bswap32 ); \
|
||||
M5 = _mm_shuffle_epi8( buf[ 5], shuf_bswap32 ); \
|
||||
M6 = _mm_shuffle_epi8( buf[ 6], shuf_bswap32 ); \
|
||||
M7 = _mm_shuffle_epi8( buf[ 7], shuf_bswap32 ); \
|
||||
M8 = _mm_shuffle_epi8( buf[ 8], shuf_bswap32 ); \
|
||||
M9 = _mm_shuffle_epi8( buf[ 9], shuf_bswap32 ); \
|
||||
MA = _mm_shuffle_epi8( buf[10], shuf_bswap32 ); \
|
||||
MB = _mm_shuffle_epi8( buf[11], shuf_bswap32 ); \
|
||||
MC = _mm_shuffle_epi8( buf[12], shuf_bswap32 ); \
|
||||
MD = _mm_shuffle_epi8( buf[13], shuf_bswap32 ); \
|
||||
ME = _mm_shuffle_epi8( buf[14], shuf_bswap32 ); \
|
||||
MF = _mm_shuffle_epi8( buf[15], shuf_bswap32 ); \
|
||||
}
|
||||
|
||||
#else // SSE2
|
||||
|
||||
#define BLAKE256_4X32_BLOCK_BSWAP32 \
|
||||
{ \
|
||||
M0 = v128_bswap32( buf[0] ); \
|
||||
@@ -470,8 +443,6 @@ void blake256_transform_le( uint32_t *H, const uint32_t *buf,
|
||||
MF = v128_bswap32( buf[15] ); \
|
||||
}
|
||||
|
||||
#endif // SSSE3 else SSE2
|
||||
|
||||
#define COMPRESS32_4X32( rounds ) \
|
||||
{ \
|
||||
v128_t M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||
@@ -926,22 +897,6 @@ void blake256_4x32_final_rounds_le( void *final_hash, const void *midstate,
|
||||
ROUND_S_4X32_3;
|
||||
}
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
|
||||
const v128_t shuf_bswap32 =
|
||||
v128_set64( 0x0c0d0e0f08090a0b, 0x0405060700010203 );
|
||||
|
||||
H[0] = _mm_shuffle_epi8( v128_xor3( V8, V0, h[0] ), shuf_bswap32 );
|
||||
H[1] = _mm_shuffle_epi8( v128_xor3( V9, V1, h[1] ), shuf_bswap32 );
|
||||
H[2] = _mm_shuffle_epi8( v128_xor3( VA, V2, h[2] ), shuf_bswap32 );
|
||||
H[3] = _mm_shuffle_epi8( v128_xor3( VB, V3, h[3] ), shuf_bswap32 );
|
||||
H[4] = _mm_shuffle_epi8( v128_xor3( VC, V4, h[4] ), shuf_bswap32 );
|
||||
H[5] = _mm_shuffle_epi8( v128_xor3( VD, V5, h[5] ), shuf_bswap32 );
|
||||
H[6] = _mm_shuffle_epi8( v128_xor3( VE, V6, h[6] ), shuf_bswap32 );
|
||||
H[7] = _mm_shuffle_epi8( v128_xor3( VF, V7, h[7] ), shuf_bswap32 );
|
||||
|
||||
#else
|
||||
|
||||
H[0] = v128_bswap32( v128_xor3( V8, V0, h[0] ) );
|
||||
H[1] = v128_bswap32( v128_xor3( V9, V1, h[1] ) );
|
||||
H[2] = v128_bswap32( v128_xor3( VA, V2, h[2] ) );
|
||||
@@ -950,8 +905,6 @@ void blake256_4x32_final_rounds_le( void *final_hash, const void *midstate,
|
||||
H[5] = v128_bswap32( v128_xor3( VD, V5, h[5] ) );
|
||||
H[6] = v128_bswap32( v128_xor3( VE, V6, h[6] ) );
|
||||
H[7] = v128_bswap32( v128_xor3( VF, V7, h[7] ) );
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined (__AVX2__)
|
||||
@@ -1291,24 +1244,22 @@ do { \
|
||||
VD = v256_32( T0 ^ 0x299F31D0 ); \
|
||||
VE = v256_32( T1 ^ 0x082EFA98 ); \
|
||||
VF = v256_32( T1 ^ 0xEC4E6C89 ); \
|
||||
const __m256i shuf_bswap32 = mm256_set2_64( \
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ); \
|
||||
M0 = _mm256_shuffle_epi8( * buf , shuf_bswap32 ); \
|
||||
M1 = _mm256_shuffle_epi8( *(buf+ 1), shuf_bswap32 ); \
|
||||
M2 = _mm256_shuffle_epi8( *(buf+ 2), shuf_bswap32 ); \
|
||||
M3 = _mm256_shuffle_epi8( *(buf+ 3), shuf_bswap32 ); \
|
||||
M4 = _mm256_shuffle_epi8( *(buf+ 4), shuf_bswap32 ); \
|
||||
M5 = _mm256_shuffle_epi8( *(buf+ 5), shuf_bswap32 ); \
|
||||
M6 = _mm256_shuffle_epi8( *(buf+ 6), shuf_bswap32 ); \
|
||||
M7 = _mm256_shuffle_epi8( *(buf+ 7), shuf_bswap32 ); \
|
||||
M8 = _mm256_shuffle_epi8( *(buf+ 8), shuf_bswap32 ); \
|
||||
M9 = _mm256_shuffle_epi8( *(buf+ 9), shuf_bswap32 ); \
|
||||
MA = _mm256_shuffle_epi8( *(buf+10), shuf_bswap32 ); \
|
||||
MB = _mm256_shuffle_epi8( *(buf+11), shuf_bswap32 ); \
|
||||
MC = _mm256_shuffle_epi8( *(buf+12), shuf_bswap32 ); \
|
||||
MD = _mm256_shuffle_epi8( *(buf+13), shuf_bswap32 ); \
|
||||
ME = _mm256_shuffle_epi8( *(buf+14), shuf_bswap32 ); \
|
||||
MF = _mm256_shuffle_epi8( *(buf+15), shuf_bswap32 ); \
|
||||
M0 = mm256_bswap_32( * buf ); \
|
||||
M1 = mm256_bswap_32( *(buf+ 1) ); \
|
||||
M2 = mm256_bswap_32( *(buf+ 2) ); \
|
||||
M3 = mm256_bswap_32( *(buf+ 3) ); \
|
||||
M4 = mm256_bswap_32( *(buf+ 4) ); \
|
||||
M5 = mm256_bswap_32( *(buf+ 5) ); \
|
||||
M6 = mm256_bswap_32( *(buf+ 6) ); \
|
||||
M7 = mm256_bswap_32( *(buf+ 7) ); \
|
||||
M8 = mm256_bswap_32( *(buf+ 8) ); \
|
||||
M9 = mm256_bswap_32( *(buf+ 9) ); \
|
||||
MA = mm256_bswap_32( *(buf+10) ); \
|
||||
MB = mm256_bswap_32( *(buf+11) ); \
|
||||
MC = mm256_bswap_32( *(buf+12) ); \
|
||||
MD = mm256_bswap_32( *(buf+13) ); \
|
||||
ME = mm256_bswap_32( *(buf+14) ); \
|
||||
MF = mm256_bswap_32( *(buf+15) ); \
|
||||
ROUND_S_8WAY(0); \
|
||||
ROUND_S_8WAY(1); \
|
||||
ROUND_S_8WAY(2); \
|
||||
@@ -1401,7 +1352,7 @@ do { \
|
||||
H7 = mm256_xor3( VF, V7, H7 ); \
|
||||
}
|
||||
|
||||
void blake256_8way_round0_prehash_le( void *midstate, const void *midhash,
|
||||
void blake256_8x32_round0_prehash_le( void *midstate, const void *midhash,
|
||||
void *data )
|
||||
{
|
||||
__m256i *M = (__m256i*)data;
|
||||
@@ -1491,7 +1442,7 @@ void blake256_8way_round0_prehash_le( void *midstate, const void *midhash,
|
||||
_mm256_xor_si256( v256_32( CSE ), M[15] ) );
|
||||
}
|
||||
|
||||
void blake256_8way_final_rounds_le( void *final_hash, const void *midstate,
|
||||
void blake256_8x32_final_rounds_le( void *final_hash, const void *midstate,
|
||||
const void *midhash, const void *data, const int rounds )
|
||||
{
|
||||
__m256i *H = (__m256i*)final_hash;
|
||||
@@ -1596,17 +1547,14 @@ void blake256_8way_final_rounds_le( void *final_hash, const void *midstate,
|
||||
ROUND256_8WAY_3;
|
||||
}
|
||||
|
||||
const __m256i shuf_bswap32 =
|
||||
mm256_set2_64( 0x0c0d0e0f08090a0b, 0x0405060700010203 );
|
||||
|
||||
H[0] = _mm256_shuffle_epi8( mm256_xor3( V8, V0, h[0] ), shuf_bswap32 );
|
||||
H[1] = _mm256_shuffle_epi8( mm256_xor3( V9, V1, h[1] ), shuf_bswap32 );
|
||||
H[2] = _mm256_shuffle_epi8( mm256_xor3( VA, V2, h[2] ), shuf_bswap32 );
|
||||
H[3] = _mm256_shuffle_epi8( mm256_xor3( VB, V3, h[3] ), shuf_bswap32 );
|
||||
H[4] = _mm256_shuffle_epi8( mm256_xor3( VC, V4, h[4] ), shuf_bswap32 );
|
||||
H[5] = _mm256_shuffle_epi8( mm256_xor3( VD, V5, h[5] ), shuf_bswap32 );
|
||||
H[6] = _mm256_shuffle_epi8( mm256_xor3( VE, V6, h[6] ), shuf_bswap32 );
|
||||
H[7] = _mm256_shuffle_epi8( mm256_xor3( VF, V7, h[7] ), shuf_bswap32 );
|
||||
H[0] = mm256_bswap_32( mm256_xor3( V8, V0, h[0] ) );
|
||||
H[1] = mm256_bswap_32( mm256_xor3( V9, V1, h[1] ) );
|
||||
H[2] = mm256_bswap_32( mm256_xor3( VA, V2, h[2] ) );
|
||||
H[3] = mm256_bswap_32( mm256_xor3( VB, V3, h[3] ) );
|
||||
H[4] = mm256_bswap_32( mm256_xor3( VC, V4, h[4] ) );
|
||||
H[5] = mm256_bswap_32( mm256_xor3( VD, V5, h[5] ) );
|
||||
H[6] = mm256_bswap_32( mm256_xor3( VE, V6, h[6] ) );
|
||||
H[7] = mm256_bswap_32( mm256_xor3( VF, V7, h[7] ) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1933,8 +1881,6 @@ do { \
|
||||
__m512i M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
__m512i V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
__m512i V8, V9, VA, VB, VC, VD, VE, VF; \
|
||||
const __m512i shuf_bswap32 = mm512_bcast_m128( v128_set64( \
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) ); \
|
||||
V0 = H0; \
|
||||
V1 = H1; \
|
||||
V2 = H2; \
|
||||
@@ -1951,22 +1897,22 @@ do { \
|
||||
VD = v512_32( T0 ^ 0x299F31D0 ); \
|
||||
VE = v512_32( T1 ^ 0x082EFA98 ); \
|
||||
VF = v512_32( T1 ^ 0xEC4E6C89 ); \
|
||||
M0 = _mm512_shuffle_epi8( * buf , shuf_bswap32 ); \
|
||||
M1 = _mm512_shuffle_epi8( *(buf+ 1), shuf_bswap32 ); \
|
||||
M2 = _mm512_shuffle_epi8( *(buf+ 2), shuf_bswap32 ); \
|
||||
M3 = _mm512_shuffle_epi8( *(buf+ 3), shuf_bswap32 ); \
|
||||
M4 = _mm512_shuffle_epi8( *(buf+ 4), shuf_bswap32 ); \
|
||||
M5 = _mm512_shuffle_epi8( *(buf+ 5), shuf_bswap32 ); \
|
||||
M6 = _mm512_shuffle_epi8( *(buf+ 6), shuf_bswap32 ); \
|
||||
M7 = _mm512_shuffle_epi8( *(buf+ 7), shuf_bswap32 ); \
|
||||
M8 = _mm512_shuffle_epi8( *(buf+ 8), shuf_bswap32 ); \
|
||||
M9 = _mm512_shuffle_epi8( *(buf+ 9), shuf_bswap32 ); \
|
||||
MA = _mm512_shuffle_epi8( *(buf+10), shuf_bswap32 ); \
|
||||
MB = _mm512_shuffle_epi8( *(buf+11), shuf_bswap32 ); \
|
||||
MC = _mm512_shuffle_epi8( *(buf+12), shuf_bswap32 ); \
|
||||
MD = _mm512_shuffle_epi8( *(buf+13), shuf_bswap32 ); \
|
||||
ME = _mm512_shuffle_epi8( *(buf+14), shuf_bswap32 ); \
|
||||
MF = _mm512_shuffle_epi8( *(buf+15), shuf_bswap32 ); \
|
||||
M0 = mm512_bswap_32( * buf ); \
|
||||
M1 = mm512_bswap_32( *(buf+ 1) ); \
|
||||
M2 = mm512_bswap_32( *(buf+ 2) ); \
|
||||
M3 = mm512_bswap_32( *(buf+ 3) ); \
|
||||
M4 = mm512_bswap_32( *(buf+ 4) ); \
|
||||
M5 = mm512_bswap_32( *(buf+ 5) ); \
|
||||
M6 = mm512_bswap_32( *(buf+ 6) ); \
|
||||
M7 = mm512_bswap_32( *(buf+ 7) ); \
|
||||
M8 = mm512_bswap_32( *(buf+ 8) ); \
|
||||
M9 = mm512_bswap_32( *(buf+ 9) ); \
|
||||
MA = mm512_bswap_32( *(buf+10) ); \
|
||||
MB = mm512_bswap_32( *(buf+11) ); \
|
||||
MC = mm512_bswap_32( *(buf+12) ); \
|
||||
MD = mm512_bswap_32( *(buf+13) ); \
|
||||
ME = mm512_bswap_32( *(buf+14) ); \
|
||||
MF = mm512_bswap_32( *(buf+15) ); \
|
||||
ROUND_S_16WAY(0); \
|
||||
ROUND_S_16WAY(1); \
|
||||
ROUND_S_16WAY(2); \
|
||||
@@ -2063,7 +2009,7 @@ do { \
|
||||
// is constant for every nonce and only needs to be run once per job. The
|
||||
// second part is run for each nonce using the precalculated midstate and the
|
||||
// hash from the first block.
|
||||
void blake256_16way_round0_prehash_le( void *midstate, const void *midhash,
|
||||
void blake256_16x32_round0_prehash_le( void *midstate, const void *midhash,
|
||||
void *data )
|
||||
{
|
||||
__m512i *M = (__m512i*)data;
|
||||
@@ -2157,7 +2103,7 @@ void blake256_16way_round0_prehash_le( void *midstate, const void *midhash,
|
||||
}
|
||||
|
||||
// Dfault is 14 rounds, blakecoin & vanilla are 8.
|
||||
void blake256_16way_final_rounds_le( void *final_hash, const void *midstate,
|
||||
void blake256_16x32_final_rounds_le( void *final_hash, const void *midstate,
|
||||
const void *midhash, const void *data, const int rounds )
|
||||
{
|
||||
__m512i *H = (__m512i*)final_hash;
|
||||
@@ -2274,27 +2220,23 @@ void blake256_16way_final_rounds_le( void *final_hash, const void *midstate,
|
||||
}
|
||||
|
||||
// Byte swap final hash
|
||||
const __m512i shuf_bswap32 = mm512_bcast_m128( v128_set64(
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) );
|
||||
H[0] = _mm512_shuffle_epi8( mm512_xor3( V8, V0, h[0] ), shuf_bswap32 );
|
||||
H[1] = _mm512_shuffle_epi8( mm512_xor3( V9, V1, h[1] ), shuf_bswap32 );
|
||||
H[2] = _mm512_shuffle_epi8( mm512_xor3( VA, V2, h[2] ), shuf_bswap32 );
|
||||
H[3] = _mm512_shuffle_epi8( mm512_xor3( VB, V3, h[3] ), shuf_bswap32 );
|
||||
H[4] = _mm512_shuffle_epi8( mm512_xor3( VC, V4, h[4] ), shuf_bswap32 );
|
||||
H[5] = _mm512_shuffle_epi8( mm512_xor3( VD, V5, h[5] ), shuf_bswap32 );
|
||||
H[6] = _mm512_shuffle_epi8( mm512_xor3( VE, V6, h[6] ), shuf_bswap32 );
|
||||
H[7] = _mm512_shuffle_epi8( mm512_xor3( VF, V7, h[7] ), shuf_bswap32 );
|
||||
H[0] = mm512_bswap_32( mm512_xor3( V8, V0, h[0] ) );
|
||||
H[1] = mm512_bswap_32( mm512_xor3( V9, V1, h[1] ) );
|
||||
H[2] = mm512_bswap_32( mm512_xor3( VA, V2, h[2] ) );
|
||||
H[3] = mm512_bswap_32( mm512_xor3( VB, V3, h[3] ) );
|
||||
H[4] = mm512_bswap_32( mm512_xor3( VC, V4, h[4] ) );
|
||||
H[5] = mm512_bswap_32( mm512_xor3( VD, V5, h[5] ) );
|
||||
H[6] = mm512_bswap_32( mm512_xor3( VE, V6, h[6] ) );
|
||||
H[7] = mm512_bswap_32( mm512_xor3( VF, V7, h[7] ) );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Blake-256 4 way
|
||||
|
||||
static const uint32_t salt_zero_4x32_small[4] = { 0, 0, 0, 0 };
|
||||
|
||||
static void
|
||||
blake32_4x32_init( blake_4x32_small_context *ctx, const uint32_t *iv,
|
||||
const uint32_t *salt, int rounds )
|
||||
int rounds )
|
||||
{
|
||||
casti_v128( ctx->H, 0 ) = v128_64( 0x6A09E6676A09E667 );
|
||||
casti_v128( ctx->H, 1 ) = v128_64( 0xBB67AE85BB67AE85 );
|
||||
@@ -2404,11 +2346,10 @@ blake32_4x32_close( blake_4x32_small_context *ctx, unsigned ub, unsigned n,
|
||||
|
||||
// Blake-256 8 way
|
||||
|
||||
static const uint32_t salt_zero_8way_small[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
static void
|
||||
blake32_8way_init( blake_8way_small_context *sc, const uint32_t *iv,
|
||||
const uint32_t *salt, int rounds )
|
||||
blake32_8way_init( blake256_8x32_context *sc, const uint32_t *iv,
|
||||
int rounds )
|
||||
{
|
||||
casti_m256i( sc->H, 0 ) = v256_64( 0x6A09E6676A09E667 );
|
||||
casti_m256i( sc->H, 1 ) = v256_64( 0xBB67AE85BB67AE85 );
|
||||
@@ -2424,7 +2365,7 @@ blake32_8way_init( blake_8way_small_context *sc, const uint32_t *iv,
|
||||
}
|
||||
|
||||
static void
|
||||
blake32_8way( blake_8way_small_context *sc, const void *data, size_t len )
|
||||
blake32_8way( blake256_8x32_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buf;
|
||||
@@ -2466,7 +2407,7 @@ blake32_8way( blake_8way_small_context *sc, const void *data, size_t len )
|
||||
}
|
||||
|
||||
static void
|
||||
blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
|
||||
blake32_8way_close( blake256_8x32_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32 )
|
||||
{
|
||||
__m256i buf[16];
|
||||
@@ -2520,7 +2461,7 @@ blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
|
||||
}
|
||||
|
||||
static void
|
||||
blake32_8way_le( blake_8way_small_context *sc, const void *data, size_t len )
|
||||
blake32_8way_le( blake256_8x32_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buf;
|
||||
@@ -2562,7 +2503,7 @@ blake32_8way_le( blake_8way_small_context *sc, const void *data, size_t len )
|
||||
}
|
||||
|
||||
static void
|
||||
blake32_8way_close_le( blake_8way_small_context *sc, unsigned ub, unsigned n,
|
||||
blake32_8way_close_le( blake256_8x32_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32 )
|
||||
{
|
||||
__m256i buf[16];
|
||||
@@ -2622,8 +2563,8 @@ blake32_8way_close_le( blake_8way_small_context *sc, unsigned ub, unsigned n,
|
||||
//Blake-256 16 way AVX512
|
||||
|
||||
static void
|
||||
blake32_16way_init( blake_16way_small_context *sc, const uint32_t *iv,
|
||||
const uint32_t *salt, int rounds )
|
||||
blake32_16way_init( blake256_16x32_context *sc, const uint32_t *iv,
|
||||
int rounds )
|
||||
{
|
||||
casti_m512i( sc->H, 0 ) = v512_64( 0x6A09E6676A09E667 );
|
||||
casti_m512i( sc->H, 1 ) = v512_64( 0xBB67AE85BB67AE85 );
|
||||
@@ -2639,7 +2580,7 @@ blake32_16way_init( blake_16way_small_context *sc, const uint32_t *iv,
|
||||
}
|
||||
|
||||
static void
|
||||
blake32_16way( blake_16way_small_context *sc, const void *data, size_t len )
|
||||
blake32_16way( blake256_16x32_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
__m512i *buf;
|
||||
@@ -2679,7 +2620,7 @@ blake32_16way( blake_16way_small_context *sc, const void *data, size_t len )
|
||||
sc->ptr = ptr;
|
||||
}
|
||||
static void
|
||||
blake32_16way_close( blake_16way_small_context *sc, unsigned ub, unsigned n,
|
||||
blake32_16way_close( blake256_16x32_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32 )
|
||||
{
|
||||
__m512i buf[16];
|
||||
@@ -2733,7 +2674,7 @@ blake32_16way_close( blake_16way_small_context *sc, unsigned ub, unsigned n,
|
||||
}
|
||||
|
||||
static void
|
||||
blake32_16way_le( blake_16way_small_context *sc, const void *data, size_t len )
|
||||
blake32_16way_le( blake256_16x32_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
__m512i *buf;
|
||||
@@ -2776,7 +2717,7 @@ blake32_16way_le( blake_16way_small_context *sc, const void *data, size_t len )
|
||||
}
|
||||
|
||||
static void
|
||||
blake32_16way_close_le( blake_16way_small_context *sc, unsigned ub, unsigned n,
|
||||
blake32_16way_close_le( blake256_16x32_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32 )
|
||||
{
|
||||
__m512i buf[16];
|
||||
@@ -2827,65 +2768,65 @@ blake32_16way_close_le( blake_16way_small_context *sc, unsigned ub, unsigned n,
|
||||
}
|
||||
|
||||
void
|
||||
blake256_16way_init(void *cc)
|
||||
blake256_16x32_init(void *cc)
|
||||
{
|
||||
blake32_16way_init( cc, IV256, salt_zero_8way_small, 14 );
|
||||
blake32_16way_init( cc, IV256, 14 );
|
||||
}
|
||||
|
||||
void
|
||||
blake256_16way_update(void *cc, const void *data, size_t len)
|
||||
blake256_16x32_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
blake32_16way(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
blake256_16way_close(void *cc, void *dst)
|
||||
blake256_16x32_close(void *cc, void *dst)
|
||||
{
|
||||
blake32_16way_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
void
|
||||
blake256_16way_update_le(void *cc, const void *data, size_t len)
|
||||
blake256_16x32_update_le(void *cc, const void *data, size_t len)
|
||||
{
|
||||
blake32_16way_le(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
blake256_16way_close_le(void *cc, void *dst)
|
||||
blake256_16x32_close_le(void *cc, void *dst)
|
||||
{
|
||||
blake32_16way_close_le(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
void blake256r14_16way_init(void *cc)
|
||||
{
|
||||
blake32_16way_init( cc, IV256, salt_zero_8way_small, 14 );
|
||||
blake32_16way_init( cc, IV256, 14 );
|
||||
}
|
||||
|
||||
void
|
||||
blake256r14_16way_update(void *cc, const void *data, size_t len)
|
||||
blake256r14_16x32_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
blake32_16way(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
blake256r14_16way_close(void *cc, void *dst)
|
||||
blake256r14_16x32_close(void *cc, void *dst)
|
||||
{
|
||||
blake32_16way_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
void blake256r8_16way_init(void *cc)
|
||||
{
|
||||
blake32_16way_init( cc, IV256, salt_zero_8way_small, 8 );
|
||||
blake32_16way_init( cc, IV256, 8 );
|
||||
}
|
||||
|
||||
void
|
||||
blake256r8_16way_update(void *cc, const void *data, size_t len)
|
||||
blake256r8_16x32_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
blake32_16way(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
blake256r8_16way_close(void *cc, void *dst)
|
||||
blake256r8_16x32_close(void *cc, void *dst)
|
||||
{
|
||||
blake32_16way_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
@@ -2898,7 +2839,7 @@ blake256r8_16way_close(void *cc, void *dst)
|
||||
void
|
||||
blake256_4x32_init(void *ctx)
|
||||
{
|
||||
blake32_4x32_init( ctx, IV256, salt_zero_4x32_small, 14 );
|
||||
blake32_4x32_init( ctx, IV256, 14 );
|
||||
}
|
||||
|
||||
void
|
||||
@@ -2918,31 +2859,31 @@ blake256_4x32_close(void *ctx, void *dst)
|
||||
// Blake-256 8 way
|
||||
|
||||
void
|
||||
blake256_8way_init(void *cc)
|
||||
blake256_8x32_init(void *cc)
|
||||
{
|
||||
blake32_8way_init( cc, IV256, salt_zero_8way_small, 14 );
|
||||
blake32_8way_init( cc, IV256, 14 );
|
||||
}
|
||||
|
||||
void
|
||||
blake256_8way_update(void *cc, const void *data, size_t len)
|
||||
blake256_8x32_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
blake32_8way(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
blake256_8way_close(void *cc, void *dst)
|
||||
blake256_8x32_close(void *cc, void *dst)
|
||||
{
|
||||
blake32_8way_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
void
|
||||
blake256_8way_update_le(void *cc, const void *data, size_t len)
|
||||
blake256_8x32_update_le(void *cc, const void *data, size_t len)
|
||||
{
|
||||
blake32_8way_le(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
blake256_8way_close_le(void *cc, void *dst)
|
||||
blake256_8x32_close_le(void *cc, void *dst)
|
||||
{
|
||||
blake32_8way_close_le(cc, 0, 0, dst, 8);
|
||||
}
|
||||
@@ -2952,7 +2893,7 @@ blake256_8way_close_le(void *cc, void *dst)
|
||||
// 14 rounds Blake, Decred
|
||||
void blake256r14_4x32_init(void *cc)
|
||||
{
|
||||
blake32_4x32_init( cc, IV256, salt_zero_4x32_small, 14 );
|
||||
blake32_4x32_init( cc, IV256, 14 );
|
||||
}
|
||||
|
||||
void
|
||||
@@ -2969,19 +2910,19 @@ blake256r14_4x32_close(void *cc, void *dst)
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
void blake256r14_8way_init(void *cc)
|
||||
void blake256r14_8x32_init(void *cc)
|
||||
{
|
||||
blake32_8way_init( cc, IV256, salt_zero_8way_small, 14 );
|
||||
blake32_8way_init( cc, IV256, 14 );
|
||||
}
|
||||
|
||||
void
|
||||
blake256r14_8way_update(void *cc, const void *data, size_t len)
|
||||
blake256r14_8x32_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
blake32_8way(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
blake256r14_8way_close(void *cc, void *dst)
|
||||
blake256r14_8x32_close(void *cc, void *dst)
|
||||
{
|
||||
blake32_8way_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
@@ -2991,7 +2932,7 @@ blake256r14_8way_close(void *cc, void *dst)
|
||||
// 8 rounds Blakecoin, Vanilla
|
||||
void blake256r8_4x32_init(void *cc)
|
||||
{
|
||||
blake32_4x32_init( cc, IV256, salt_zero_4x32_small, 8 );
|
||||
blake32_4x32_init( cc, IV256, 8 );
|
||||
}
|
||||
|
||||
void
|
||||
@@ -3008,19 +2949,19 @@ blake256r8_4x32_close(void *cc, void *dst)
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
void blake256r8_8way_init(void *cc)
|
||||
void blake256r8_8x32_init(void *cc)
|
||||
{
|
||||
blake32_8way_init( cc, IV256, salt_zero_8way_small, 8 );
|
||||
blake32_8way_init( cc, IV256, 8 );
|
||||
}
|
||||
|
||||
void
|
||||
blake256r8_8way_update(void *cc, const void *data, size_t len)
|
||||
blake256r8_8x32_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
blake32_8way(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
blake256r8_8way_close(void *cc, void *dst)
|
||||
blake256r8_8x32_close(void *cc, void *dst)
|
||||
{
|
||||
blake32_8way_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
@@ -29,13 +29,6 @@ typedef struct
|
||||
|
||||
void blake256_transform_le( uint32_t *H, const uint32_t *buf,
|
||||
const uint32_t T0, const uint32_t T1, int rounds );
|
||||
/*
|
||||
void blake256_init( blake256_context *sc );
|
||||
void blake256_update( blake256_context *sc, const void *data, size_t len );
|
||||
void blake256_close( blake256_context *sc, void *dst );
|
||||
void blake256_full( blake256_context *sc, void *dst, const void *data,
|
||||
size_t len );
|
||||
*/
|
||||
|
||||
//////////////////////////////////
|
||||
//
|
||||
@@ -55,6 +48,10 @@ typedef blake_4x32_small_context blake256_4x32_context;
|
||||
void blake256_4x32_init(void *ctx);
|
||||
void blake256_4x32_update(void *ctx, const void *data, size_t len);
|
||||
void blake256_4x32_close(void *ctx, void *dst);
|
||||
void blake256_4x32_round0_prehash_le( void *midstate, const void *midhash,
|
||||
void *data );
|
||||
void blake256_4x32_final_rounds_le( void *final_hash, const void *midstate,
|
||||
const void *midhash, const void *data, const int rounds );
|
||||
|
||||
// 14 rounds
|
||||
typedef blake_4x32_small_context blake256r14_4x32_context;
|
||||
@@ -68,29 +65,6 @@ void blake256r8_4x32_init(void *cc);
|
||||
void blake256r8_4x32_update(void *cc, const void *data, size_t len);
|
||||
void blake256r8_4x32_close(void *cc, void *dst);
|
||||
|
||||
void blake256_4x32_round0_prehash_le( void *midstate, const void *midhash,
|
||||
void *data );
|
||||
void blake256_4x32_final_rounds_le( void *final_hash, const void *midstate,
|
||||
const void *midhash, const void *data, const int rounds );
|
||||
|
||||
#define blake_4way_small_context blake256_4x32_context
|
||||
#define blake256_4way_context blake256_4x32_context
|
||||
#define blake256_4way_init blake256_4x32_init
|
||||
#define blake256_4way_update blake256_4x32_update
|
||||
#define blake256_4way_close blake256_4x32_close
|
||||
#define blake256_4way_update_le blake256_4x32_update_le
|
||||
#define blake256_4way_close_le blake256_4x32_close_le
|
||||
#define blake256_4way_round0_prehash_le blake256_4x32_round0_prehash_le
|
||||
#define blake256_4way_final_rounds_le blake256_4x32_final_rounds_le
|
||||
#define blake256r14_4way_context blake256r14_4x32_context
|
||||
#define blake256r14_4way_init blake256r14_4x32_init
|
||||
#define blake256r14_4way_update blake256r14_4x32_update
|
||||
#define blake256r14_4way_close blake256r14_4x32_close
|
||||
#define blake256r8_4way_context blake256r14_4x32_context
|
||||
#define blake256r8_4way_init blake256r14_4x32_init
|
||||
#define blake256r8_4way_update blake256r14_4x32_update
|
||||
#define blake256r8_4way_close blake256r14_4x32_close
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
//////////////////////////////
|
||||
@@ -107,45 +81,28 @@ typedef struct
|
||||
} blake_8way_small_context;
|
||||
|
||||
// Default 14 rounds
|
||||
typedef blake_8way_small_context blake256_8way_context;
|
||||
void blake256_8way_init(void *cc);
|
||||
void blake256_8way_update(void *cc, const void *data, size_t len);
|
||||
void blake256_8way_close(void *cc, void *dst);
|
||||
void blake256_8way_update_le(void *cc, const void *data, size_t len);
|
||||
void blake256_8way_close_le(void *cc, void *dst);
|
||||
void blake256_8way_round0_prehash_le( void *midstate, const void *midhash,
|
||||
typedef blake_8way_small_context blake256_8x32_context;
|
||||
void blake256_8x32_init(void *cc);
|
||||
void blake256_8x32_update(void *cc, const void *data, size_t len);
|
||||
void blake256_8x32_close(void *cc, void *dst);
|
||||
void blake256_8x32_update_le(void *cc, const void *data, size_t len);
|
||||
void blake256_8x32_close_le(void *cc, void *dst);
|
||||
void blake256_8x32_round0_prehash_le( void *midstate, const void *midhash,
|
||||
void *data );
|
||||
void blake256_8way_final_rounds_le( void *final_hash, const void *midstate,
|
||||
void blake256_8x32_final_rounds_le( void *final_hash, const void *midstate,
|
||||
const void *midhash, const void *data, const int rounds );
|
||||
|
||||
// 14 rounds, blake, decred
|
||||
typedef blake_8way_small_context blake256r14_8way_context;
|
||||
void blake256r14_8way_init(void *cc);
|
||||
void blake256r14_8way_update(void *cc, const void *data, size_t len);
|
||||
void blake256r14_8way_close(void *cc, void *dst);
|
||||
typedef blake_8way_small_context blake256r14_8x32_context;
|
||||
void blake256r14_8x32_init(void *cc);
|
||||
void blake256r14_8x32_update(void *cc, const void *data, size_t len);
|
||||
void blake256r14_8x32_close(void *cc, void *dst);
|
||||
|
||||
// 8 rounds, blakecoin, vanilla
|
||||
typedef blake_8way_small_context blake256r8_8way_context;
|
||||
void blake256r8_8way_init(void *cc);
|
||||
void blake256r8_8way_update(void *cc, const void *data, size_t len);
|
||||
void blake256r8_8way_close(void *cc, void *dst);
|
||||
|
||||
#define blake_8x32_small_context blake256_8way_context
|
||||
#define blake_8x32_init blake256_8way_init
|
||||
#define blake_8x32_update blake256_8way_update
|
||||
#define blake_8x32_close blake256_8way_close
|
||||
#define blake_8x32_update_le blake256_8way_update_le
|
||||
#define blake_8x32_close_le blake256_8way_close_le
|
||||
#define blake_8x32_round0_prehash_le blake256_8way_round0_prehash
|
||||
#define blake_8x32_final_rounds_le blake256_8way_final_rounds_le
|
||||
#define blake256r14_8x32_context blake256r14_8way_context
|
||||
#define blake256r14_8x32_init blake256r14_8way_init
|
||||
#define blake256r14_8x32_update blake256r14_8way_update
|
||||
#define blake256r14_8x32_close blake256r14_8way_close
|
||||
#define blake256r8_8x32_context blake256r14_8way_context
|
||||
#define blake256r8_8x32_init blake256r14_8way_init
|
||||
#define blake256r8_8x32_update blake256r14_8way_update
|
||||
#define blake256r8_8x32_close blake256r14_8way_close
|
||||
typedef blake_8way_small_context blake256r8_8x32_context;
|
||||
void blake256r8_8x32_init(void *cc);
|
||||
void blake256r8_8x32_update(void *cc, const void *data, size_t len);
|
||||
void blake256r8_8x32_close(void *cc, void *dst);
|
||||
|
||||
#if defined(SIMD512)
|
||||
|
||||
@@ -163,46 +120,29 @@ typedef struct
|
||||
} blake_16way_small_context __attribute__ ((aligned (128)));
|
||||
|
||||
// Default 14 rounds
|
||||
typedef blake_16way_small_context blake256_16way_context;
|
||||
void blake256_16way_init(void *cc);
|
||||
void blake256_16way_update(void *cc, const void *data, size_t len);
|
||||
void blake256_16way_close(void *cc, void *dst);
|
||||
typedef blake_16way_small_context blake256_16x32_context;
|
||||
void blake256_16x32_init(void *cc);
|
||||
void blake256_16x32_update(void *cc, const void *data, size_t len);
|
||||
void blake256_16x32_close(void *cc, void *dst);
|
||||
// Expects data in little endian order, no byte swap needed
|
||||
void blake256_16way_update_le(void *cc, const void *data, size_t len);
|
||||
void blake256_16way_close_le(void *cc, void *dst);
|
||||
void blake256_16way_round0_prehash_le( void *midstate, const void *midhash,
|
||||
void blake256_16x32_update_le(void *cc, const void *data, size_t len);
|
||||
void blake256_16x32_close_le(void *cc, void *dst);
|
||||
void blake256_16x32_round0_prehash_le( void *midstate, const void *midhash,
|
||||
void *data );
|
||||
void blake256_16way_final_rounds_le( void *final_hash, const void *midstate,
|
||||
void blake256_16x32_final_rounds_le( void *final_hash, const void *midstate,
|
||||
const void *midhash, const void *data, const int rounds );
|
||||
|
||||
// 14 rounds, blake, decred
|
||||
typedef blake_16way_small_context blake256r14_16way_context;
|
||||
void blake256r14_16way_init(void *cc);
|
||||
void blake256r14_16way_update(void *cc, const void *data, size_t len);
|
||||
void blake256r14_16way_close(void *cc, void *dst);
|
||||
typedef blake_16way_small_context blake256r14_16x32_context;
|
||||
void blake256r14_16x32_init(void *cc);
|
||||
void blake256r14_16x32_update(void *cc, const void *data, size_t len);
|
||||
void blake256r14_16x32_close(void *cc, void *dst);
|
||||
|
||||
// 8 rounds, blakecoin, vanilla
|
||||
typedef blake_16way_small_context blake256r8_16way_context;
|
||||
void blake256r8_16way_init(void *cc);
|
||||
void blake256r8_16way_update(void *cc, const void *data, size_t len);
|
||||
void blake256r8_16way_close(void *cc, void *dst);
|
||||
|
||||
#define blake_16x32_small_context blake256_16way_context
|
||||
#define blake_16x32_init blake256_16way_init
|
||||
#define blake_16x32_update blake256_16way_update
|
||||
#define blake_16x32_close blake256_16way_close
|
||||
#define blake_16x32_update_le blake256_16way_update_le
|
||||
#define blake_16x32_close_le blake256_16way_close_le
|
||||
#define blake_16x32_round0_prehash_le blake256_16way_round0_prehash
|
||||
#define blake_16x32_final_rounds_le blake256_16way_final_rounds_le
|
||||
#define blake256r14_16x32_context blake256r14_16way_context
|
||||
#define blake256r14_16x32_init blake256r14_16way_init
|
||||
#define blake256r14_16x32_update blake256r14_16way_update
|
||||
#define blake256r14_16x32_close blake256r14_16way_close
|
||||
#define blake256r8_16x32_context blake256r8_16way_context
|
||||
#define blake256r8_16x32_init blake256r8_16way_init
|
||||
#define blake256r8_16x32_update blake256r8_16way_update
|
||||
#define blake256r8_16x32_close blake256r8_16way_close
|
||||
typedef blake_16way_small_context blake256r8_16x32_context;
|
||||
void blake256r8_16x32_init(void *cc);
|
||||
void blake256r8_16x32_update(void *cc, const void *data, size_t len);
|
||||
void blake256r8_16x32_close(void *cc, void *dst);
|
||||
|
||||
#endif // AVX512
|
||||
#endif // AVX2
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
#define ALIGN(x) __attribute__((aligned(x)))
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(SIMD512)
|
||||
|
||||
typedef struct ALIGN( 64 ) {
|
||||
@@ -30,11 +29,6 @@ void blake2b_8x64_update( blake2b_8x64_ctx *ctx, const void *input,
|
||||
size_t inlen );
|
||||
void blake2b_8x64_final( blake2b_8x64_ctx *ctx, void *out );
|
||||
|
||||
#define blake2b_8way_ctx blake2b_8x64_ctx
|
||||
#define blake2b_8way_init blake2b_8x64_init
|
||||
#define blake2b_8way_update blake2b_8x64_update
|
||||
#define blake2b_8way_final blake2b_8x64_final
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__AVX2__)
|
||||
@@ -53,11 +47,6 @@ void blake2b_4x64_update( blake2b_4x64_ctx *ctx, const void *input,
|
||||
size_t inlen );
|
||||
void blake2b_4x64_final( blake2b_4x64_ctx *ctx, void *out );
|
||||
|
||||
#define blake2b_4way_ctx blake2b_4x64_ctx
|
||||
#define blake2b_4way_init blake2b_4x64_init
|
||||
#define blake2b_4way_update blake2b_4x64_update
|
||||
#define blake2b_4way_final blake2b_4x64_final
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -17,7 +17,7 @@ int scanhash_blake2b_8way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));;
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));;
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
blake2b_8way_ctx ctx __attribute__ ((aligned (64)));
|
||||
blake2b_8x64_ctx ctx __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[49]); // 3*16+1
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -35,9 +35,9 @@ int scanhash_blake2b_8way( struct work *work, uint32_t max_nonce,
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
|
||||
|
||||
blake2b_8way_init( &ctx );
|
||||
blake2b_8way_update( &ctx, vdata, 80 );
|
||||
blake2b_8way_final( &ctx, hash );
|
||||
blake2b_8x64_init( &ctx );
|
||||
blake2b_8x64_update( &ctx, vdata, 80 );
|
||||
blake2b_8x64_final( &ctx, hash );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( hash7[ lane<<1 ] <= Htarg )
|
||||
@@ -61,10 +61,10 @@ int scanhash_blake2b_8way( struct work *work, uint32_t max_nonce,
|
||||
// Function not used, code inlined.
|
||||
void blake2b_4way_hash(void *output, const void *input)
|
||||
{
|
||||
blake2b_4way_ctx ctx;
|
||||
blake2b_4way_init( &ctx );
|
||||
blake2b_4way_update( &ctx, input, 80 );
|
||||
blake2b_4way_final( &ctx, output );
|
||||
blake2b_4x64_ctx ctx;
|
||||
blake2b_4x64_init( &ctx );
|
||||
blake2b_4x64_update( &ctx, input, 80 );
|
||||
blake2b_4x64_final( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_blake2b_4way( struct work *work, uint32_t max_nonce,
|
||||
@@ -73,7 +73,7 @@ int scanhash_blake2b_4way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));;
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (32)));;
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
blake2b_4way_ctx ctx __attribute__ ((aligned (32)));
|
||||
blake2b_4x64_ctx ctx __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[25]); // 3*8+1
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -90,9 +90,9 @@ int scanhash_blake2b_4way( struct work *work, uint32_t max_nonce,
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
blake2b_4way_init( &ctx );
|
||||
blake2b_4way_update( &ctx, vdata, 80 );
|
||||
blake2b_4way_final( &ctx, hash );
|
||||
blake2b_4x64_init( &ctx );
|
||||
blake2b_4x64_update( &ctx, vdata, 80 );
|
||||
blake2b_4x64_final( &ctx, hash );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash7[ lane<<1 ] <= Htarg )
|
||||
|
||||
@@ -61,6 +61,11 @@ int blake2s_4way_final( blake2s_4way_state *S, void *out, uint8_t outlen );
|
||||
int blake2s_4way_full_blocks( blake2s_4way_state *S, void *out,
|
||||
const void *input, uint64_t inlen );
|
||||
|
||||
#define blake2s_4x32_state blake2s_4way_state
|
||||
#define blake2s_4x32_init blake2s_4way_init
|
||||
#define blake2s_4x32_update blake2s_4way_update
|
||||
#define blake2s_4x32_final blake2s_4way_final
|
||||
#define blake2s_4x32_full_blocks blake2s_4way_full_blocks
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
@@ -81,6 +86,12 @@ int blake2s_8way_final( blake2s_8way_state *S, void *out, uint8_t outlen );
|
||||
int blake2s_8way_full_blocks( blake2s_8way_state *S, void *out,
|
||||
const void *input, uint64_t inlen );
|
||||
|
||||
#define blake2s_8x32_state blake2s_8way_state
|
||||
#define blake2s_8x32_init blake2s_8way_init
|
||||
#define blake2s_8x32_update blake2s_8way_update
|
||||
#define blake2s_8x32_final blake2s_8way_final
|
||||
#define blake2s_8x32_full_blocks blake2s_8way_full_blocks
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SIMD512)
|
||||
@@ -100,6 +111,11 @@ int blake2s_16way_update( blake2s_16way_state *S, const void *in,
|
||||
uint64_t inlen );
|
||||
int blake2s_16way_final( blake2s_16way_state *S, void *out, uint8_t outlen );
|
||||
|
||||
#define blake2s_16x32_state blake2s_16way_state
|
||||
#define blake2s_16x32_init blake2s_16way_init
|
||||
#define blake2s_16x32_update blake2s_16way_update
|
||||
#define blake2s_16x32_final blake2s_16way_final
|
||||
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
|
||||
@@ -617,24 +617,22 @@ void blake512_full( blake512_context *sc, void *dst, const void *data,
|
||||
VD = v512_64( CB5 ^ T0 ); \
|
||||
VE = v512_64( CB6 ^ T1 ); \
|
||||
VF = v512_64( CB7 ^ T1 ); \
|
||||
const __m512i shuf_bswap64 = mm512_bcast_m128( v128_set64( \
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 ) ); \
|
||||
M0 = _mm512_shuffle_epi8( *(buf+ 0), shuf_bswap64 ); \
|
||||
M1 = _mm512_shuffle_epi8( *(buf+ 1), shuf_bswap64 ); \
|
||||
M2 = _mm512_shuffle_epi8( *(buf+ 2), shuf_bswap64 ); \
|
||||
M3 = _mm512_shuffle_epi8( *(buf+ 3), shuf_bswap64 ); \
|
||||
M4 = _mm512_shuffle_epi8( *(buf+ 4), shuf_bswap64 ); \
|
||||
M5 = _mm512_shuffle_epi8( *(buf+ 5), shuf_bswap64 ); \
|
||||
M6 = _mm512_shuffle_epi8( *(buf+ 6), shuf_bswap64 ); \
|
||||
M7 = _mm512_shuffle_epi8( *(buf+ 7), shuf_bswap64 ); \
|
||||
M8 = _mm512_shuffle_epi8( *(buf+ 8), shuf_bswap64 ); \
|
||||
M9 = _mm512_shuffle_epi8( *(buf+ 9), shuf_bswap64 ); \
|
||||
MA = _mm512_shuffle_epi8( *(buf+10), shuf_bswap64 ); \
|
||||
MB = _mm512_shuffle_epi8( *(buf+11), shuf_bswap64 ); \
|
||||
MC = _mm512_shuffle_epi8( *(buf+12), shuf_bswap64 ); \
|
||||
MD = _mm512_shuffle_epi8( *(buf+13), shuf_bswap64 ); \
|
||||
ME = _mm512_shuffle_epi8( *(buf+14), shuf_bswap64 ); \
|
||||
MF = _mm512_shuffle_epi8( *(buf+15), shuf_bswap64 ); \
|
||||
M0 = mm512_bswap_64( *(buf+ 0) ); \
|
||||
M1 = mm512_bswap_64( *(buf+ 1) ); \
|
||||
M2 = mm512_bswap_64( *(buf+ 2) ); \
|
||||
M3 = mm512_bswap_64( *(buf+ 3) ); \
|
||||
M4 = mm512_bswap_64( *(buf+ 4) ); \
|
||||
M5 = mm512_bswap_64( *(buf+ 5) ); \
|
||||
M6 = mm512_bswap_64( *(buf+ 6) ); \
|
||||
M7 = mm512_bswap_64( *(buf+ 7) ); \
|
||||
M8 = mm512_bswap_64( *(buf+ 8) ); \
|
||||
M9 = mm512_bswap_64( *(buf+ 9) ); \
|
||||
MA = mm512_bswap_64( *(buf+10) ); \
|
||||
MB = mm512_bswap_64( *(buf+11) ); \
|
||||
MC = mm512_bswap_64( *(buf+12) ); \
|
||||
MD = mm512_bswap_64( *(buf+13) ); \
|
||||
ME = mm512_bswap_64( *(buf+14) ); \
|
||||
MF = mm512_bswap_64( *(buf+15) ); \
|
||||
ROUND_B_8WAY(0); \
|
||||
ROUND_B_8WAY(1); \
|
||||
ROUND_B_8WAY(2); \
|
||||
@@ -661,7 +659,7 @@ void blake512_full( blake512_context *sc, void *dst, const void *data,
|
||||
H7 = mm512_xor3( VF, V7, H7 ); \
|
||||
}
|
||||
|
||||
void blake512_8way_compress( blake_8way_big_context *sc )
|
||||
void blake512_8x64_compress( blake_8x64_big_context *sc )
|
||||
{
|
||||
__m512i M0, M1, M2, M3, M4, M5, M6, M7;
|
||||
__m512i M8, M9, MA, MB, MC, MD, ME, MF;
|
||||
@@ -685,25 +683,22 @@ void blake512_8way_compress( blake_8way_big_context *sc )
|
||||
VE = v512_64( CB6 ^ sc->T1 );
|
||||
VF = v512_64( CB7 ^ sc->T1 );
|
||||
|
||||
const __m512i shuf_bswap64 = mm512_bcast_m128( v128_set64(
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 ) );
|
||||
|
||||
M0 = _mm512_shuffle_epi8( sc->buf[ 0], shuf_bswap64 );
|
||||
M1 = _mm512_shuffle_epi8( sc->buf[ 1], shuf_bswap64 );
|
||||
M2 = _mm512_shuffle_epi8( sc->buf[ 2], shuf_bswap64 );
|
||||
M3 = _mm512_shuffle_epi8( sc->buf[ 3], shuf_bswap64 );
|
||||
M4 = _mm512_shuffle_epi8( sc->buf[ 4], shuf_bswap64 );
|
||||
M5 = _mm512_shuffle_epi8( sc->buf[ 5], shuf_bswap64 );
|
||||
M6 = _mm512_shuffle_epi8( sc->buf[ 6], shuf_bswap64 );
|
||||
M7 = _mm512_shuffle_epi8( sc->buf[ 7], shuf_bswap64 );
|
||||
M8 = _mm512_shuffle_epi8( sc->buf[ 8], shuf_bswap64 );
|
||||
M9 = _mm512_shuffle_epi8( sc->buf[ 9], shuf_bswap64 );
|
||||
MA = _mm512_shuffle_epi8( sc->buf[10], shuf_bswap64 );
|
||||
MB = _mm512_shuffle_epi8( sc->buf[11], shuf_bswap64 );
|
||||
MC = _mm512_shuffle_epi8( sc->buf[12], shuf_bswap64 );
|
||||
MD = _mm512_shuffle_epi8( sc->buf[13], shuf_bswap64 );
|
||||
ME = _mm512_shuffle_epi8( sc->buf[14], shuf_bswap64 );
|
||||
MF = _mm512_shuffle_epi8( sc->buf[15], shuf_bswap64 );
|
||||
M0 = mm512_bswap_64( sc->buf[ 0] );
|
||||
M1 = mm512_bswap_64( sc->buf[ 1] );
|
||||
M2 = mm512_bswap_64( sc->buf[ 2] );
|
||||
M3 = mm512_bswap_64( sc->buf[ 3] );
|
||||
M4 = mm512_bswap_64( sc->buf[ 4] );
|
||||
M5 = mm512_bswap_64( sc->buf[ 5] );
|
||||
M6 = mm512_bswap_64( sc->buf[ 6] );
|
||||
M7 = mm512_bswap_64( sc->buf[ 7] );
|
||||
M8 = mm512_bswap_64( sc->buf[ 8] );
|
||||
M9 = mm512_bswap_64( sc->buf[ 9] );
|
||||
MA = mm512_bswap_64( sc->buf[10] );
|
||||
MB = mm512_bswap_64( sc->buf[11] );
|
||||
MC = mm512_bswap_64( sc->buf[12] );
|
||||
MD = mm512_bswap_64( sc->buf[13] );
|
||||
ME = mm512_bswap_64( sc->buf[14] );
|
||||
MF = mm512_bswap_64( sc->buf[15] );
|
||||
|
||||
ROUND_B_8WAY(0);
|
||||
ROUND_B_8WAY(1);
|
||||
@@ -733,7 +728,7 @@ void blake512_8way_compress( blake_8way_big_context *sc )
|
||||
}
|
||||
|
||||
// won't be used after prehash implemented
|
||||
void blake512_8way_compress_le( blake_8x64_big_context *sc )
|
||||
void blake512_8x64_compress_le( blake_8x64_big_context *sc )
|
||||
{
|
||||
__m512i M0, M1, M2, M3, M4, M5, M6, M7;
|
||||
__m512i M8, M9, MA, MB, MC, MD, ME, MF;
|
||||
@@ -1177,7 +1172,7 @@ void blake512_8x64_full( blake_8x64_big_context *sc, void * dst,
|
||||
{
|
||||
if ( ( sc->T0 = sc->T0 + 1024 ) < 1024 )
|
||||
sc->T1 = sc->T1 + 1;
|
||||
blake512_8way_compress( sc );
|
||||
blake512_8x64_compress( sc );
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
@@ -1213,7 +1208,7 @@ void blake512_8x64_full( blake_8x64_big_context *sc, void * dst,
|
||||
if ( ( sc->T0 = sc->T0 + 1024 ) < 1024 )
|
||||
sc->T1 = sc->T1 + 1;
|
||||
|
||||
blake512_8way_compress( sc );
|
||||
blake512_8x64_compress( sc );
|
||||
|
||||
mm512_block_bswap_64( (__m512i*)dst, sc->H );
|
||||
}
|
||||
@@ -1244,7 +1239,7 @@ void blake512_8x64_full_le( blake_8x64_big_context *sc, void * dst,
|
||||
{
|
||||
if ( ( sc->T0 = sc->T0 + 1024 ) < 1024 )
|
||||
sc->T1 = sc->T1 + 1;
|
||||
blake512_8way_compress_le( sc );
|
||||
blake512_8x64_compress_le( sc );
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
@@ -1280,7 +1275,7 @@ void blake512_8x64_full_le( blake_8x64_big_context *sc, void * dst,
|
||||
if ( ( sc->T0 = sc->T0 + 1024 ) < 1024 )
|
||||
sc->T1 = sc->T1 + 1;
|
||||
|
||||
blake512_8way_compress_le( sc );
|
||||
blake512_8x64_compress_le( sc );
|
||||
|
||||
mm512_block_bswap_64( (__m512i*)dst, sc->H );
|
||||
}
|
||||
@@ -1355,24 +1350,22 @@ blake512_8x64_close(void *cc, void *dst)
|
||||
VD = v256_64( CB5 ^ T0 ); \
|
||||
VE = v256_64( CB6 ^ T1 ); \
|
||||
VF = v256_64( CB7 ^ T1 ); \
|
||||
const __m256i shuf_bswap64 = mm256_bcast_m128( v128_set64( \
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 ) ); \
|
||||
M0 = _mm256_shuffle_epi8( *(buf+ 0), shuf_bswap64 ); \
|
||||
M1 = _mm256_shuffle_epi8( *(buf+ 1), shuf_bswap64 ); \
|
||||
M2 = _mm256_shuffle_epi8( *(buf+ 2), shuf_bswap64 ); \
|
||||
M3 = _mm256_shuffle_epi8( *(buf+ 3), shuf_bswap64 ); \
|
||||
M4 = _mm256_shuffle_epi8( *(buf+ 4), shuf_bswap64 ); \
|
||||
M5 = _mm256_shuffle_epi8( *(buf+ 5), shuf_bswap64 ); \
|
||||
M6 = _mm256_shuffle_epi8( *(buf+ 6), shuf_bswap64 ); \
|
||||
M7 = _mm256_shuffle_epi8( *(buf+ 7), shuf_bswap64 ); \
|
||||
M8 = _mm256_shuffle_epi8( *(buf+ 8), shuf_bswap64 ); \
|
||||
M9 = _mm256_shuffle_epi8( *(buf+ 9), shuf_bswap64 ); \
|
||||
MA = _mm256_shuffle_epi8( *(buf+10), shuf_bswap64 ); \
|
||||
MB = _mm256_shuffle_epi8( *(buf+11), shuf_bswap64 ); \
|
||||
MC = _mm256_shuffle_epi8( *(buf+12), shuf_bswap64 ); \
|
||||
MD = _mm256_shuffle_epi8( *(buf+13), shuf_bswap64 ); \
|
||||
ME = _mm256_shuffle_epi8( *(buf+14), shuf_bswap64 ); \
|
||||
MF = _mm256_shuffle_epi8( *(buf+15), shuf_bswap64 ); \
|
||||
M0 = mm256_bswap_64( *(buf+ 0) ); \
|
||||
M1 = mm256_bswap_64( *(buf+ 1) ); \
|
||||
M2 = mm256_bswap_64( *(buf+ 2) ); \
|
||||
M3 = mm256_bswap_64( *(buf+ 3) ); \
|
||||
M4 = mm256_bswap_64( *(buf+ 4) ); \
|
||||
M5 = mm256_bswap_64( *(buf+ 5) ); \
|
||||
M6 = mm256_bswap_64( *(buf+ 6) ); \
|
||||
M7 = mm256_bswap_64( *(buf+ 7) ); \
|
||||
M8 = mm256_bswap_64( *(buf+ 8) ); \
|
||||
M9 = mm256_bswap_64( *(buf+ 9) ); \
|
||||
MA = mm256_bswap_64( *(buf+10) ); \
|
||||
MB = mm256_bswap_64( *(buf+11) ); \
|
||||
MC = mm256_bswap_64( *(buf+12) ); \
|
||||
MD = mm256_bswap_64( *(buf+13) ); \
|
||||
ME = mm256_bswap_64( *(buf+14) ); \
|
||||
MF = mm256_bswap_64( *(buf+15) ); \
|
||||
ROUND_B_4WAY(0); \
|
||||
ROUND_B_4WAY(1); \
|
||||
ROUND_B_4WAY(2); \
|
||||
@@ -1400,7 +1393,7 @@ blake512_8x64_close(void *cc, void *dst)
|
||||
}
|
||||
|
||||
|
||||
void blake512_4way_compress( blake_4x64_big_context *sc )
|
||||
void blake512_4x64_compress( blake_4x64_big_context *sc )
|
||||
{
|
||||
__m256i M0, M1, M2, M3, M4, M5, M6, M7;
|
||||
__m256i M8, M9, MA, MB, MC, MD, ME, MF;
|
||||
@@ -1423,25 +1416,23 @@ void blake512_4way_compress( blake_4x64_big_context *sc )
|
||||
VD = v256_64( CB5 ^ sc->T0 );
|
||||
VE = v256_64( CB6 ^ sc->T1 );
|
||||
VF = v256_64( CB7 ^ sc->T1 );
|
||||
const __m256i shuf_bswap64 = mm256_bcast_m128( v128_set64(
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 ) );
|
||||
|
||||
M0 = _mm256_shuffle_epi8( sc->buf[ 0], shuf_bswap64 );
|
||||
M1 = _mm256_shuffle_epi8( sc->buf[ 1], shuf_bswap64 );
|
||||
M2 = _mm256_shuffle_epi8( sc->buf[ 2], shuf_bswap64 );
|
||||
M3 = _mm256_shuffle_epi8( sc->buf[ 3], shuf_bswap64 );
|
||||
M4 = _mm256_shuffle_epi8( sc->buf[ 4], shuf_bswap64 );
|
||||
M5 = _mm256_shuffle_epi8( sc->buf[ 5], shuf_bswap64 );
|
||||
M6 = _mm256_shuffle_epi8( sc->buf[ 6], shuf_bswap64 );
|
||||
M7 = _mm256_shuffle_epi8( sc->buf[ 7], shuf_bswap64 );
|
||||
M8 = _mm256_shuffle_epi8( sc->buf[ 8], shuf_bswap64 );
|
||||
M9 = _mm256_shuffle_epi8( sc->buf[ 9], shuf_bswap64 );
|
||||
MA = _mm256_shuffle_epi8( sc->buf[10], shuf_bswap64 );
|
||||
MB = _mm256_shuffle_epi8( sc->buf[11], shuf_bswap64 );
|
||||
MC = _mm256_shuffle_epi8( sc->buf[12], shuf_bswap64 );
|
||||
MD = _mm256_shuffle_epi8( sc->buf[13], shuf_bswap64 );
|
||||
ME = _mm256_shuffle_epi8( sc->buf[14], shuf_bswap64 );
|
||||
MF = _mm256_shuffle_epi8( sc->buf[15], shuf_bswap64 );
|
||||
M0 = mm256_bswap_64( sc->buf[ 0] );
|
||||
M1 = mm256_bswap_64( sc->buf[ 1] );
|
||||
M2 = mm256_bswap_64( sc->buf[ 2] );
|
||||
M3 = mm256_bswap_64( sc->buf[ 3] );
|
||||
M4 = mm256_bswap_64( sc->buf[ 4] );
|
||||
M5 = mm256_bswap_64( sc->buf[ 5] );
|
||||
M6 = mm256_bswap_64( sc->buf[ 6] );
|
||||
M7 = mm256_bswap_64( sc->buf[ 7] );
|
||||
M8 = mm256_bswap_64( sc->buf[ 8] );
|
||||
M9 = mm256_bswap_64( sc->buf[ 9] );
|
||||
MA = mm256_bswap_64( sc->buf[10] );
|
||||
MB = mm256_bswap_64( sc->buf[11] );
|
||||
MC = mm256_bswap_64( sc->buf[12] );
|
||||
MD = mm256_bswap_64( sc->buf[13] );
|
||||
ME = mm256_bswap_64( sc->buf[14] );
|
||||
MF = mm256_bswap_64( sc->buf[15] );
|
||||
|
||||
ROUND_B_4WAY(0);
|
||||
ROUND_B_4WAY(1);
|
||||
@@ -1470,7 +1461,7 @@ void blake512_4way_compress( blake_4x64_big_context *sc )
|
||||
sc->H[7] = mm256_xor3( VF, V7, sc->H[7] );
|
||||
}
|
||||
|
||||
void blake512_4x64_prehash_le( blake_4x64_big_context *sc, __m256i *midstate,
|
||||
void blake512_4x64_prehash_le( blake512_4x64_context *sc, __m256i *midstate,
|
||||
const void *data )
|
||||
{
|
||||
__m256i V0, V1, V2, V3, V4, V5, V6, V7;
|
||||
@@ -1562,7 +1553,7 @@ void blake512_4x64_prehash_le( blake_4x64_big_context *sc, __m256i *midstate,
|
||||
midstate[15] = VF;
|
||||
}
|
||||
|
||||
void blake512_4x64_final_le( blake_4x64_big_context *sc, void *hash,
|
||||
void blake512_4x64_final_le( blake512_4x64_context *sc, void *hash,
|
||||
const __m256i nonce, const __m256i *midstate )
|
||||
{
|
||||
__m256i M0, M1, M2, M3, M4, M5, M6, M7;
|
||||
@@ -1685,7 +1676,7 @@ void blake512_4x64_final_le( blake_4x64_big_context *sc, void *hash,
|
||||
}
|
||||
|
||||
|
||||
void blake512_4x64_init( blake_4x64_big_context *sc )
|
||||
void blake512_4x64_init( blake512_4x64_context *sc )
|
||||
{
|
||||
casti_m256i( sc->H, 0 ) = v256_64( 0x6A09E667F3BCC908 );
|
||||
casti_m256i( sc->H, 1 ) = v256_64( 0xBB67AE8584CAA73B );
|
||||
@@ -1798,7 +1789,7 @@ blake64_4way_close( blake_4x64_big_context *sc, void *dst )
|
||||
}
|
||||
|
||||
// init, update & close
|
||||
void blake512_4x64_full( blake_4x64_big_context *sc, void * dst,
|
||||
void blake512_4x64_full( blake512_4x64_context *sc, void * dst,
|
||||
const void *data, size_t len )
|
||||
{
|
||||
|
||||
@@ -1824,7 +1815,7 @@ void blake512_4x64_full( blake_4x64_big_context *sc, void * dst,
|
||||
{
|
||||
if ( ( sc->T0 = sc->T0 + 1024 ) < 1024 )
|
||||
sc->T1 = sc->T1 + 1;
|
||||
blake512_4way_compress( sc );
|
||||
blake512_4x64_compress( sc );
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
@@ -1859,7 +1850,7 @@ void blake512_4x64_full( blake_4x64_big_context *sc, void * dst,
|
||||
if ( ( sc->T0 = sc->T0 + 1024 ) < 1024 )
|
||||
sc->T1 = sc->T1 + 1;
|
||||
|
||||
blake512_4way_compress( sc );
|
||||
blake512_4x64_compress( sc );
|
||||
|
||||
mm256_block_bswap_64( (__m256i*)dst, sc->H );
|
||||
}
|
||||
@@ -1934,29 +1925,6 @@ void blake512_2x64_compress( blake_2x64_big_context *sc )
|
||||
VE = v128_64( CB6 ^ sc->T1 );
|
||||
VF = v128_64( CB7 ^ sc->T1 );
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
|
||||
const v128u64_t shuf_bswap64 = v128_set64(
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 );
|
||||
M0 = v128_shuffle8( sc->buf[ 0], shuf_bswap64 );
|
||||
M1 = v128_shuffle8( sc->buf[ 1], shuf_bswap64 );
|
||||
M2 = v128_shuffle8( sc->buf[ 2], shuf_bswap64 );
|
||||
M3 = v128_shuffle8( sc->buf[ 3], shuf_bswap64 );
|
||||
M4 = v128_shuffle8( sc->buf[ 4], shuf_bswap64 );
|
||||
M5 = v128_shuffle8( sc->buf[ 5], shuf_bswap64 );
|
||||
M6 = v128_shuffle8( sc->buf[ 6], shuf_bswap64 );
|
||||
M7 = v128_shuffle8( sc->buf[ 7], shuf_bswap64 );
|
||||
M8 = v128_shuffle8( sc->buf[ 8], shuf_bswap64 );
|
||||
M9 = v128_shuffle8( sc->buf[ 9], shuf_bswap64 );
|
||||
MA = v128_shuffle8( sc->buf[10], shuf_bswap64 );
|
||||
MB = v128_shuffle8( sc->buf[11], shuf_bswap64 );
|
||||
MC = v128_shuffle8( sc->buf[12], shuf_bswap64 );
|
||||
MD = v128_shuffle8( sc->buf[13], shuf_bswap64 );
|
||||
ME = v128_shuffle8( sc->buf[14], shuf_bswap64 );
|
||||
MF = v128_shuffle8( sc->buf[15], shuf_bswap64 );
|
||||
|
||||
#else // SSE2 & NEON
|
||||
|
||||
M0 = v128_bswap64( sc->buf[ 0] );
|
||||
M1 = v128_bswap64( sc->buf[ 1] );
|
||||
M2 = v128_bswap64( sc->buf[ 2] );
|
||||
@@ -1974,8 +1942,6 @@ void blake512_2x64_compress( blake_2x64_big_context *sc )
|
||||
ME = v128_bswap64( sc->buf[14] );
|
||||
MF = v128_bswap64( sc->buf[15] );
|
||||
|
||||
#endif
|
||||
|
||||
ROUND_B_2X64(0);
|
||||
ROUND_B_2X64(1);
|
||||
ROUND_B_2X64(2);
|
||||
|
||||
@@ -54,10 +54,10 @@ int scanhash_blakecoin_16way( struct work *work, uint32_t max_nonce,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n +1, n );
|
||||
|
||||
// Partialy prehash second block without touching nonces in block_buf[3].
|
||||
blake256_16way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
blake256_16x32_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
|
||||
do {
|
||||
blake256_16way_final_rounds_le( hash32, midstate_vars, block0_hash,
|
||||
blake256_16x32_final_rounds_le( hash32, midstate_vars, block0_hash,
|
||||
block_buf, rounds );
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||
@@ -123,10 +123,10 @@ int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
|
||||
block_buf[3] = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
|
||||
|
||||
// Partialy prehash second block without touching nonces in block_buf[3].
|
||||
blake256_8way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
blake256_8x32_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
|
||||
do {
|
||||
blake256_8way_final_rounds_le( hash32, midstate_vars, block0_hash,
|
||||
blake256_8x32_final_rounds_le( hash32, midstate_vars, block0_hash,
|
||||
block_buf, rounds );
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||
@@ -148,16 +148,16 @@ int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#elif defined (BLAKECOIN_4WAY)
|
||||
|
||||
blake256r8_4way_context blakecoin_4w_ctx;
|
||||
blake256r8_4x32_context blakecoin_4w_ctx;
|
||||
|
||||
void blakecoin_4way_hash(void *state, const void *input)
|
||||
{
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake256r8_4way_context ctx;
|
||||
blake256r8_4x32_context ctx;
|
||||
|
||||
memcpy( &ctx, &blakecoin_4w_ctx, sizeof ctx );
|
||||
blake256r8_4way_update( &ctx, input + (64<<2), 16 );
|
||||
blake256r8_4way_close( &ctx, vhash );
|
||||
blake256r8_4x32_update( &ctx, input + (64<<2), 16 );
|
||||
blake256r8_4x32_close( &ctx, vhash );
|
||||
|
||||
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
@@ -178,8 +178,8 @@ int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce,
|
||||
HTarget = 0x7f;
|
||||
|
||||
v128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
blake256r8_4way_init( &blakecoin_4w_ctx );
|
||||
blake256r8_4way_update( &blakecoin_4w_ctx, vdata, 64 );
|
||||
blake256r8_4x32_init( &blakecoin_4w_ctx );
|
||||
blake256r8_4x32_update( &blakecoin_4w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
*noncev = v128_bswap32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
||||
@@ -16,28 +16,27 @@ extern void pentablakehash_4way( void *output, const void *input )
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake512_4way_context ctx;
|
||||
blake512_4x64_context ctx;
|
||||
|
||||
blake512_4x64_init( &ctx );
|
||||
blake512_4x64_update( &ctx, input, 80 );
|
||||
blake512_4x64_close( &ctx, vhash );
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way_update( &ctx, input, 80 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
blake512_4x64_init( &ctx );
|
||||
blake512_4x64_update( &ctx, vhash, 64 );
|
||||
blake512_4x64_close( &ctx, vhash );
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way_update( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
blake512_4x64_init( &ctx );
|
||||
blake512_4x64_update( &ctx, vhash, 64 );
|
||||
blake512_4x64_close( &ctx, vhash );
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way_update( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
blake512_4x64_init( &ctx );
|
||||
blake512_4x64_update( &ctx, vhash, 64 );
|
||||
blake512_4x64_close( &ctx, vhash );
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way_update( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way_update( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
blake512_4x64_init( &ctx );
|
||||
blake512_4x64_update( &ctx, vhash, 64 );
|
||||
blake512_4x64_close( &ctx, vhash );
|
||||
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
|
||||
@@ -227,7 +227,7 @@ int blake2s_compress( blake2s_state *S, const uint8_t block[64] )
|
||||
v[14] = S->f[0] ^ blake2s_IV[6];
|
||||
v[15] = S->f[1] ^ blake2s_IV[7];
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#if defined(__SSE2__) || defined(__ARM_NEON)
|
||||
|
||||
v128_t *V = (v128_t*)v;
|
||||
|
||||
@@ -263,19 +263,6 @@ int blake2s_compress( blake2s_state *S, const uint8_t block[64] )
|
||||
V[3] = v128_swap64( V[3] ); \
|
||||
V[2] = v128_shufll32( V[2] )
|
||||
|
||||
BLAKE2S_ROUND(0);
|
||||
BLAKE2S_ROUND(1);
|
||||
BLAKE2S_ROUND(2);
|
||||
BLAKE2S_ROUND(3);
|
||||
BLAKE2S_ROUND(4);
|
||||
BLAKE2S_ROUND(5);
|
||||
BLAKE2S_ROUND(6);
|
||||
BLAKE2S_ROUND(7);
|
||||
BLAKE2S_ROUND(8);
|
||||
BLAKE2S_ROUND(9);
|
||||
|
||||
#undef BLAKE2S_ROUND
|
||||
|
||||
#else
|
||||
|
||||
#define G(r,i,a,b,c,d) \
|
||||
@@ -290,7 +277,7 @@ int blake2s_compress( blake2s_state *S, const uint8_t block[64] )
|
||||
b = SPH_ROTR32(b ^ c, 7); \
|
||||
} while(0)
|
||||
|
||||
#define ROUND(r) \
|
||||
#define BLAKE2S_ROUND(r) \
|
||||
do { \
|
||||
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
||||
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
||||
@@ -302,24 +289,25 @@ int blake2s_compress( blake2s_state *S, const uint8_t block[64] )
|
||||
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
|
||||
} while(0)
|
||||
|
||||
ROUND( 0 );
|
||||
ROUND( 1 );
|
||||
ROUND( 2 );
|
||||
ROUND( 3 );
|
||||
ROUND( 4 );
|
||||
ROUND( 5 );
|
||||
ROUND( 6 );
|
||||
ROUND( 7 );
|
||||
ROUND( 8 );
|
||||
ROUND( 9 );
|
||||
|
||||
#endif
|
||||
|
||||
BLAKE2S_ROUND(0);
|
||||
BLAKE2S_ROUND(1);
|
||||
BLAKE2S_ROUND(2);
|
||||
BLAKE2S_ROUND(3);
|
||||
BLAKE2S_ROUND(4);
|
||||
BLAKE2S_ROUND(5);
|
||||
BLAKE2S_ROUND(6);
|
||||
BLAKE2S_ROUND(7);
|
||||
BLAKE2S_ROUND(8);
|
||||
BLAKE2S_ROUND(9);
|
||||
|
||||
|
||||
for( size_t i = 0; i < 8; ++i )
|
||||
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
|
||||
|
||||
#undef G
|
||||
#undef ROUND
|
||||
#undef BLAKE2S_ROUND
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -39,16 +39,14 @@
|
||||
#include <stddef.h>
|
||||
#include "simd-utils.h"
|
||||
|
||||
#define SPH_SIZE_bmw256 256
|
||||
|
||||
#define SPH_SIZE_bmw512 512
|
||||
|
||||
// BMW-256 4 way 32
|
||||
|
||||
#if defined(__SSE2__) || defined(__ARM_NEON)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
v128_t buf[64];
|
||||
v128_t H[16];
|
||||
v128u32_t buf[64];
|
||||
v128u32_t H[16];
|
||||
size_t ptr;
|
||||
uint32_t bit_count; // assume bit_count fits in 32 bits
|
||||
} bmw_4way_small_context;
|
||||
@@ -58,13 +56,19 @@ typedef bmw_4way_small_context bmw256_4way_context;
|
||||
void bmw256_4way_init( bmw256_4way_context *ctx );
|
||||
|
||||
void bmw256_4way_update(void *cc, const void *data, size_t len);
|
||||
#define bmw256_4way bmw256_4way_update
|
||||
|
||||
void bmw256_4way_close(void *cc, void *dst);
|
||||
|
||||
void bmw256_4way_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#define bmw256_4x32_context bmw256_4way_context
|
||||
#define bmw256_4x32_init bmw256_4way_init
|
||||
#define bmw256_4x32_update bmw256_4way_update
|
||||
#define bmw256_4x32_close bmw256_4way_close
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// BMW-256 8 way 32
|
||||
@@ -85,6 +89,11 @@ void bmw256_8way_update( bmw256_8way_context *ctx, const void *data,
|
||||
#define bmw256_8way bmw256_8way_update
|
||||
void bmw256_8way_close( bmw256_8way_context *ctx, void *dst );
|
||||
|
||||
#define bmw256_8x32_context bmw256_8way_context
|
||||
#define bmw256_8x32_init bmw256_8way_init
|
||||
#define bmw256_8x32_update bmw256_8way_update
|
||||
#define bmw256_8x32_close bmw256_8way_close
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SIMD512)
|
||||
@@ -106,6 +115,11 @@ void bmw256_16way_update( bmw256_16way_context *ctx, const void *data,
|
||||
size_t len );
|
||||
void bmw256_16way_close( bmw256_16way_context *ctx, void *dst );
|
||||
|
||||
#define bmw256_16x32_context bmw256_16way_context
|
||||
#define bmw256_16x32_init bmw256_16way_init
|
||||
#define bmw256_16x32_update bmw256_16way_update
|
||||
#define bmw256_16x32_close bmw256_16way_close
|
||||
|
||||
#endif
|
||||
|
||||
// BMW-512 2 way 64
|
||||
|
||||
@@ -45,7 +45,7 @@ extern "C"{
|
||||
|
||||
#define LPAR (
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#if defined(__SSE2__) || defined(__ARM_NEON)
|
||||
|
||||
// BMW-256 4 way 32
|
||||
/*
|
||||
@@ -284,9 +284,9 @@ static const uint32_t IV256[] = {
|
||||
v128_xor( M[13], H[13] ) ) )
|
||||
|
||||
|
||||
void compress_small( const v128u64_t *M, const v128u64_t H[16], v128u64_t dH[16] )
|
||||
void compress_small( const v128u32_t *M, const v128u32_t H[16], v128u32_t dH[16] )
|
||||
{
|
||||
v128u64_t qt[32], xl, xh; \
|
||||
v128u32_t qt[32], xl, xh; \
|
||||
|
||||
qt[ 0] = v128_add32( ss0( Ws0 ), H[ 1] );
|
||||
qt[ 1] = v128_add32( ss1( Ws1 ), H[ 2] );
|
||||
@@ -428,49 +428,25 @@ static const uint32_t final_s[16][4] =
|
||||
{ 0xaaaaaaae, 0xaaaaaaae, 0xaaaaaaae, 0xaaaaaaae },
|
||||
{ 0xaaaaaaaf, 0xaaaaaaaf, 0xaaaaaaaf, 0xaaaaaaaf }
|
||||
};
|
||||
/*
|
||||
static const v128u64_t final_s[16] =
|
||||
{
|
||||
{ 0xaaaaaaa0aaaaaaa0, 0xaaaaaaa0aaaaaaa0 },
|
||||
{ 0xaaaaaaa1aaaaaaa1, 0xaaaaaaa1aaaaaaa1 },
|
||||
{ 0xaaaaaaa2aaaaaaa2, 0xaaaaaaa2aaaaaaa2 },
|
||||
{ 0xaaaaaaa3aaaaaaa3, 0xaaaaaaa3aaaaaaa3 },
|
||||
{ 0xaaaaaaa4aaaaaaa4, 0xaaaaaaa4aaaaaaa4 },
|
||||
{ 0xaaaaaaa5aaaaaaa5, 0xaaaaaaa5aaaaaaa5 },
|
||||
{ 0xaaaaaaa6aaaaaaa6, 0xaaaaaaa6aaaaaaa6 },
|
||||
{ 0xaaaaaaa7aaaaaaa7, 0xaaaaaaa7aaaaaaa7 },
|
||||
{ 0xaaaaaaa8aaaaaaa8, 0xaaaaaaa8aaaaaaa8 },
|
||||
{ 0xaaaaaaa9aaaaaaa9, 0xaaaaaaa9aaaaaaa9 },
|
||||
{ 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa },
|
||||
{ 0xaaaaaaabaaaaaaab, 0xaaaaaaabaaaaaaab },
|
||||
{ 0xaaaaaaacaaaaaaac, 0xaaaaaaacaaaaaaac },
|
||||
{ 0xaaaaaaadaaaaaaad, 0xaaaaaaadaaaaaaad },
|
||||
{ 0xaaaaaaaeaaaaaaae, 0xaaaaaaaeaaaaaaae },
|
||||
{ 0xaaaaaaafaaaaaaaf, 0xaaaaaaafaaaaaaaf }
|
||||
};
|
||||
*/
|
||||
|
||||
void bmw256_4way_init( bmw256_4way_context *ctx )
|
||||
{
|
||||
ctx->H[ 0] = v128_64( 0x4041424340414243 );
|
||||
ctx->H[ 1] = v128_64( 0x4445464744454647 );
|
||||
ctx->H[ 2] = v128_64( 0x48494A4B48494A4B );
|
||||
ctx->H[ 3] = v128_64( 0x4C4D4E4F4C4D4E4F );
|
||||
ctx->H[ 4] = v128_64( 0x5051525350515253 );
|
||||
ctx->H[ 5] = v128_64( 0x5455565754555657 );
|
||||
ctx->H[ 6] = v128_64( 0x58595A5B58595A5B );
|
||||
ctx->H[ 7] = v128_64( 0x5C5D5E5F5C5D5E5F );
|
||||
ctx->H[ 8] = v128_64( 0x6061626360616263 );
|
||||
ctx->H[ 9] = v128_64( 0x6465666764656667 );
|
||||
ctx->H[10] = v128_64( 0x68696A6B68696A6B );
|
||||
ctx->H[11] = v128_64( 0x6C6D6E6F6C6D6E6F );
|
||||
ctx->H[12] = v128_64( 0x7071727370717273 );
|
||||
ctx->H[13] = v128_64( 0x7475767774757677 );
|
||||
ctx->H[14] = v128_64( 0x78797A7B78797A7B );
|
||||
ctx->H[15] = v128_64( 0x7C7D7E7F7C7D7E7F );
|
||||
|
||||
|
||||
// for ( int i = 0; i < 16; i++ )
|
||||
// sc->H[i] = v128_32( iv[i] );
|
||||
ctx->H[ 0] = v128_32( 0x40414243 );
|
||||
ctx->H[ 1] = v128_32( 0x44454647 );
|
||||
ctx->H[ 2] = v128_32( 0x48494A4B );
|
||||
ctx->H[ 3] = v128_32( 0x4C4D4E4F );
|
||||
ctx->H[ 4] = v128_32( 0x50515253 );
|
||||
ctx->H[ 5] = v128_32( 0x54555657 );
|
||||
ctx->H[ 6] = v128_32( 0x58595A5B );
|
||||
ctx->H[ 7] = v128_32( 0x5C5D5E5F );
|
||||
ctx->H[ 8] = v128_32( 0x60616263 );
|
||||
ctx->H[ 9] = v128_32( 0x64656667 );
|
||||
ctx->H[10] = v128_32( 0x68696A6B );
|
||||
ctx->H[11] = v128_32( 0x6C6D6E6F );
|
||||
ctx->H[12] = v128_32( 0x70717273 );
|
||||
ctx->H[13] = v128_32( 0x74757677 );
|
||||
ctx->H[14] = v128_32( 0x78797A7B );
|
||||
ctx->H[15] = v128_32( 0x7C7D7E7F );
|
||||
ctx->ptr = 0;
|
||||
ctx->bit_count = 0;
|
||||
}
|
||||
@@ -478,10 +454,10 @@ void bmw256_4way_init( bmw256_4way_context *ctx )
|
||||
static void
|
||||
bmw32_4way(bmw_4way_small_context *sc, const void *data, size_t len)
|
||||
{
|
||||
v128u64_t *vdata = (v128u64_t*)data;
|
||||
v128u64_t *buf;
|
||||
v128u64_t htmp[16];
|
||||
v128u64_t *h1, *h2;
|
||||
v128u32_t *vdata = (v128u32_t*)data;
|
||||
v128u32_t *buf;
|
||||
v128u32_t htmp[16];
|
||||
v128u32_t *h1, *h2;
|
||||
size_t ptr;
|
||||
const int buf_size = 64; // bytes of one lane, compatible with len
|
||||
|
||||
@@ -503,7 +479,7 @@ bmw32_4way(bmw_4way_small_context *sc, const void *data, size_t len)
|
||||
ptr += clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
v128u64_t *ht;
|
||||
v128u32_t *ht;
|
||||
compress_small( buf, h1, h2 );
|
||||
ht = h1;
|
||||
h1 = h2;
|
||||
@@ -521,14 +497,14 @@ static void
|
||||
bmw32_4way_close(bmw_4way_small_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32)
|
||||
{
|
||||
v128u64_t *buf;
|
||||
v128u64_t h1[16], h2[16], *h;
|
||||
v128u32_t *buf;
|
||||
v128u32_t h1[16], h2[16], *h;
|
||||
size_t ptr, u, v;
|
||||
const int buf_size = 64; // bytes of one lane, compatible with len
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
buf[ ptr>>2 ] = v128_64( 0x0000008000000080 );
|
||||
buf[ ptr>>2 ] = v128_32( 0x00000080 );
|
||||
ptr += 4;
|
||||
h = sc->H;
|
||||
|
||||
@@ -548,7 +524,7 @@ bmw32_4way_close(bmw_4way_small_context *sc, unsigned ub, unsigned n,
|
||||
for ( u = 0; u < 16; u ++ )
|
||||
buf[u] = h2[u];
|
||||
|
||||
compress_small( buf, (v128u64_t*)final_s, h1 );
|
||||
compress_small( buf, (v128u32_t*)final_s, h1 );
|
||||
|
||||
for (u = 0, v = 16 - out_size_w32; u < out_size_w32; u ++, v ++)
|
||||
casti_v128( dst, u ) = h1[v];
|
||||
|
||||
@@ -39,7 +39,7 @@ static void transform( cubehashParam *sp )
|
||||
|
||||
#elif defined(__AVX2__)
|
||||
|
||||
register __m256i x0, x1, x2, x3, y0, y1;
|
||||
register __m256i x0, x1, x2, x3, t0;
|
||||
|
||||
x0 = _mm256_load_si256( (__m256i*)sp->x );
|
||||
x1 = _mm256_load_si256( (__m256i*)sp->x + 1 );
|
||||
@@ -50,10 +50,10 @@ static void transform( cubehashParam *sp )
|
||||
{
|
||||
x2 = _mm256_add_epi32( x0, x2 );
|
||||
x3 = _mm256_add_epi32( x1, x3 );
|
||||
y0 = mm256_rol_32( x1, 7 );
|
||||
y1 = mm256_rol_32( x0, 7 );
|
||||
x0 = _mm256_xor_si256( y0, x2 );
|
||||
x1 = _mm256_xor_si256( y1, x3 );
|
||||
t0 = mm256_rol_32( x1, 7 );
|
||||
x1 = mm256_rol_32( x0, 7 );
|
||||
x0 = _mm256_xor_si256( t0, x2 );
|
||||
x1 = _mm256_xor_si256( x1, x3 );
|
||||
x2 = mm256_swap128_64( x2 );
|
||||
x3 = mm256_swap128_64( x3 );
|
||||
x2 = _mm256_add_epi32( x0, x2 );
|
||||
@@ -75,7 +75,7 @@ static void transform( cubehashParam *sp )
|
||||
|
||||
#else // AVX, SSE2, NEON
|
||||
|
||||
v128_t x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3;
|
||||
v128_t x0, x1, x2, x3, x4, x5, x6, x7, t0, t1;
|
||||
|
||||
x0 = casti_v128( sp->x, 0 );
|
||||
x1 = casti_v128( sp->x, 1 );
|
||||
@@ -92,16 +92,12 @@ static void transform( cubehashParam *sp )
|
||||
x5 = v128_add32( x1, x5 );
|
||||
x6 = v128_add32( x2, x6 );
|
||||
x7 = v128_add32( x3, x7 );
|
||||
y0 = x2;
|
||||
y1 = x3;
|
||||
y2 = x0;
|
||||
y3 = x1;
|
||||
x0 = v128_rol32( y0, 7 );
|
||||
x1 = v128_rol32( y1, 7 );
|
||||
x2 = v128_rol32( y2, 7 );
|
||||
x3 = v128_rol32( y3, 7 );
|
||||
x0 = v128_xor( x0, x4 );
|
||||
x1 = v128_xor( x1, x5 );
|
||||
t0 = v128_rol32( x2, 7 );
|
||||
t1 = v128_rol32( x3, 7 );
|
||||
x2 = v128_rol32( x0, 7 );
|
||||
x3 = v128_rol32( x1, 7 );
|
||||
x0 = v128_xor( t0, x4 );
|
||||
x1 = v128_xor( t1, x5 );
|
||||
x2 = v128_xor( x2, x6 );
|
||||
x3 = v128_xor( x3, x7 );
|
||||
x4 = v128_swap64( x4 );
|
||||
@@ -112,19 +108,15 @@ static void transform( cubehashParam *sp )
|
||||
x5 = v128_add32( x1, x5 );
|
||||
x6 = v128_add32( x2, x6 );
|
||||
x7 = v128_add32( x3, x7 );
|
||||
y0 = x1;
|
||||
y1 = x0;
|
||||
y2 = x3;
|
||||
y3 = x2;
|
||||
x0 = v128_rol32( y0, 11 );
|
||||
x1 = v128_rol32( y1, 11 );
|
||||
x2 = v128_rol32( y2, 11 );
|
||||
x3 = v128_rol32( y3, 11 );
|
||||
x0 = v128_xor( x0, x4 );
|
||||
x1 = v128_xor( x1, x5 );
|
||||
x2 = v128_xor( x2, x6 );
|
||||
x3 = v128_xor( x3, x7 );
|
||||
x4 = v128_swap64_32( x4 );
|
||||
t0 = v128_rol32( x1, 11 );
|
||||
x1 = v128_rol32( x0, 11 );
|
||||
t1 = v128_rol32( x3, 11 );
|
||||
x3 = v128_rol32( x2, 11 );
|
||||
x0 = v128_xor( t0, x4 );
|
||||
x1 = v128_xor( x1, x5 );
|
||||
x2 = v128_xor( t1, x6 );
|
||||
x3 = v128_xor( x3, x7 );
|
||||
x4 = v128_swap64_32( x4 );
|
||||
x5 = v128_swap64_32( x5 );
|
||||
x6 = v128_swap64_32( x6 );
|
||||
x7 = v128_swap64_32( x7 );
|
||||
|
||||
@@ -17,7 +17,7 @@ typedef struct {
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
#endif
|
||||
sha256_8way_context sha;
|
||||
sha256_8x32_context sha;
|
||||
} myrgr_8way_ctx_holder;
|
||||
|
||||
myrgr_8way_ctx_holder myrgr_8way_ctx;
|
||||
@@ -29,7 +29,7 @@ void init_myrgr_8way_ctx()
|
||||
#else
|
||||
init_groestl( &myrgr_8way_ctx.groestl, 64 );
|
||||
#endif
|
||||
sha256_8way_init( &myrgr_8way_ctx.sha );
|
||||
sha256_8x32_init( &myrgr_8way_ctx.sha );
|
||||
}
|
||||
|
||||
void myriad_8way_hash( void *output, const void *input )
|
||||
@@ -96,8 +96,8 @@ void myriad_8way_hash( void *output, const void *input )
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5,
|
||||
hash6, hash7 );
|
||||
|
||||
sha256_8way_update( &ctx.sha, vhash, 64 );
|
||||
sha256_8way_close( &ctx.sha, output );
|
||||
sha256_8x32_update( &ctx.sha, vhash, 64 );
|
||||
sha256_8x32_close( &ctx.sha, output );
|
||||
}
|
||||
|
||||
int scanhash_myriad_8way( struct work *work, uint32_t max_nonce,
|
||||
@@ -156,7 +156,7 @@ int scanhash_myriad_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
typedef struct {
|
||||
hashState_groestl groestl;
|
||||
sha256_4way_context sha;
|
||||
sha256_4x32_context sha;
|
||||
} myrgr_4way_ctx_holder;
|
||||
|
||||
myrgr_4way_ctx_holder myrgr_4way_ctx;
|
||||
@@ -164,7 +164,7 @@ myrgr_4way_ctx_holder myrgr_4way_ctx;
|
||||
void init_myrgr_4way_ctx()
|
||||
{
|
||||
init_groestl (&myrgr_4way_ctx.groestl, 64 );
|
||||
sha256_4way_init( &myrgr_4way_ctx.sha );
|
||||
sha256_4x32_init( &myrgr_4way_ctx.sha );
|
||||
}
|
||||
|
||||
void myriad_4way_hash( void *output, const void *input )
|
||||
@@ -189,8 +189,8 @@ void myriad_4way_hash( void *output, const void *input )
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
sha256_4way_update( &ctx.sha, vhash, 64 );
|
||||
sha256_4way_close( &ctx.sha, output );
|
||||
sha256_4x32_update( &ctx.sha, vhash, 64 );
|
||||
sha256_4x32_close( &ctx.sha, output );
|
||||
}
|
||||
|
||||
int scanhash_myriad_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
@@ -1059,7 +1059,7 @@ void hamsi_8way_big( hamsi_8way_big_context *sc, __m512i *buf, size_t num )
|
||||
WRITE_STATE_BIG8( sc );
|
||||
}
|
||||
|
||||
void hamsi_8way_big_final( hamsi_8way_big_context *sc, __m512i *buf )
|
||||
void hamsi_8way_big_final( hamsi512_8x64_context *sc, __m512i *buf )
|
||||
{
|
||||
__m512i m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
|
||||
@@ -1071,7 +1071,7 @@ void hamsi_8way_big_final( hamsi_8way_big_context *sc, __m512i *buf )
|
||||
WRITE_STATE_BIG8( sc );
|
||||
}
|
||||
|
||||
void hamsi512_8way_init( hamsi_8way_big_context *sc )
|
||||
void hamsi512_8x64_init( hamsi512_8x64_context *sc )
|
||||
{
|
||||
sc->partial_len = 0;
|
||||
sc->count_high = sc->count_low = 0;
|
||||
@@ -1087,7 +1087,7 @@ void hamsi512_8way_init( hamsi_8way_big_context *sc )
|
||||
sc->h[7] = v512_64( iv[7] );
|
||||
}
|
||||
|
||||
void hamsi512_8way_update( hamsi_8way_big_context *sc, const void *data,
|
||||
void hamsi512_8x64_update( hamsi512_8x64_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
@@ -1099,7 +1099,7 @@ void hamsi512_8way_update( hamsi_8way_big_context *sc, const void *data,
|
||||
sc->partial_len = len;
|
||||
}
|
||||
|
||||
void hamsi512_8way_close( hamsi_8way_big_context *sc, void *dst )
|
||||
void hamsi512_8x64_close( hamsi512_8x64_context *sc, void *dst )
|
||||
{
|
||||
__m512i pad[1];
|
||||
uint32_t ch, cl;
|
||||
@@ -1944,7 +1944,7 @@ void hamsi512_8x32_full( hamsi512_8x32_context *sc, void * dst,
|
||||
|
||||
////////////
|
||||
|
||||
void hamsi_big( hamsi_4way_big_context *sc, __m256i *buf, size_t num )
|
||||
void hamsi_big( hamsi512_4x64_context *sc, __m256i *buf, size_t num )
|
||||
{
|
||||
DECL_STATE_BIG
|
||||
uint32_t tmp;
|
||||
@@ -1968,7 +1968,7 @@ void hamsi_big( hamsi_4way_big_context *sc, __m256i *buf, size_t num )
|
||||
WRITE_STATE_BIG( sc );
|
||||
}
|
||||
|
||||
void hamsi_big_final( hamsi_4way_big_context *sc, __m256i *buf )
|
||||
void hamsi_big_final( hamsi512_4x64_context *sc, __m256i *buf )
|
||||
{
|
||||
__m256i m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
DECL_STATE_BIG
|
||||
@@ -1979,7 +1979,7 @@ void hamsi_big_final( hamsi_4way_big_context *sc, __m256i *buf )
|
||||
WRITE_STATE_BIG( sc );
|
||||
}
|
||||
|
||||
void hamsi512_4way_init( hamsi_4way_big_context *sc )
|
||||
void hamsi512_4x64_init( hamsi512_4x64_context *sc )
|
||||
{
|
||||
sc->partial_len = 0;
|
||||
sc->count_high = sc->count_low = 0;
|
||||
@@ -1994,7 +1994,7 @@ void hamsi512_4way_init( hamsi_4way_big_context *sc )
|
||||
sc->h[7] = v256_64( iv[7] );
|
||||
}
|
||||
|
||||
void hamsi512_4way_update( hamsi_4way_big_context *sc, const void *data,
|
||||
void hamsi512_4x64_update( hamsi512_4x64_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
@@ -2006,7 +2006,7 @@ void hamsi512_4way_update( hamsi_4way_big_context *sc, const void *data,
|
||||
sc->partial_len = len;
|
||||
}
|
||||
|
||||
void hamsi512_4way_close( hamsi_4way_big_context *sc, void *dst )
|
||||
void hamsi512_4x64_close( hamsi512_4x64_context *sc, void *dst )
|
||||
{
|
||||
__m256i pad[1];
|
||||
uint32_t ch, cl;
|
||||
|
||||
@@ -72,17 +72,17 @@ typedef struct
|
||||
size_t partial_len;
|
||||
uint32_t count_high, count_low;
|
||||
} hamsi_4way_big_context;
|
||||
typedef hamsi_4way_big_context hamsi512_4way_context;
|
||||
typedef hamsi_4way_big_context hamsi512_4x64_context;
|
||||
|
||||
void hamsi512_4way_init( hamsi512_4way_context *sc );
|
||||
void hamsi512_4way_update( hamsi512_4way_context *sc, const void *data,
|
||||
void hamsi512_4x64_init( hamsi512_4x64_context *sc );
|
||||
void hamsi512_4x64_update( hamsi512_4x64_context *sc, const void *data,
|
||||
size_t len );
|
||||
void hamsi512_4way_close( hamsi512_4way_context *sc, void *dst );
|
||||
void hamsi512_4x64_close( hamsi512_4x64_context *sc, void *dst );
|
||||
|
||||
#define hamsi512_4x64_context hamsi512_4way_context
|
||||
#define hamsi512_4x64_init hamsi512_4way_init
|
||||
#define hamsi512_4x64_update hamsi512_4way_update
|
||||
#define hamsi512_4x64_close hamsi512_4way_close
|
||||
#define hamsi512_4way_context hamsi512_4x64_context
|
||||
#define hamsi512_4way_init hamsi512_4x64_init
|
||||
#define hamsi512_4way_update hamsi512_4x64_update
|
||||
#define hamsi512_4way_close hamsi512_4x64_close
|
||||
|
||||
// Hamsi-512 8x32
|
||||
|
||||
@@ -115,17 +115,17 @@ typedef struct
|
||||
size_t partial_len;
|
||||
uint32_t count_high, count_low;
|
||||
} hamsi_8way_big_context;
|
||||
typedef hamsi_8way_big_context hamsi512_8way_context;
|
||||
typedef hamsi_8way_big_context hamsi512_8x64_context;
|
||||
|
||||
void hamsi512_8way_init( hamsi512_8way_context *sc );
|
||||
void hamsi512_8way_update( hamsi512_8way_context *sc, const void *data,
|
||||
void hamsi512_8x64_init( hamsi512_8x64_context *sc );
|
||||
void hamsi512_8x64_update( hamsi512_8x64_context *sc, const void *data,
|
||||
size_t len );
|
||||
void hamsi512_8way_close( hamsi512_8way_context *sc, void *dst );
|
||||
void hamsi512_8x64_close( hamsi512_8x64_context *sc, void *dst );
|
||||
|
||||
#define hamsi512_8x64_context hamsi512_8way_context
|
||||
#define hamsi512_8x64_init hamsi512_8way_init
|
||||
#define hamsi512_8x64_update hamsi512_8way_update
|
||||
#define hamsi512_8x64_close hamsi512_8way_close
|
||||
#define hamsi512_8way_context hamsi512_8x64_context
|
||||
#define hamsi512_8way_init hamsi512_8x64_init
|
||||
#define hamsi512_8way_update hamsi512_8x64_update
|
||||
#define hamsi512_8way_close hamsi512_8x64_close
|
||||
|
||||
// Hamsi-512 16x32
|
||||
|
||||
|
||||
@@ -82,12 +82,15 @@ typedef struct {
|
||||
typedef haval_4way_context haval256_5_4way_context;
|
||||
|
||||
void haval256_5_4way_init( void *cc );
|
||||
|
||||
void haval256_5_4way_update( void *cc, const void *data, size_t len );
|
||||
//#define haval256_5_4way haval256_5_4way_update
|
||||
|
||||
void haval256_5_4way_close( void *cc, void *dst );
|
||||
|
||||
#define haval256_4x32_context haval256_5_4way_context
|
||||
#define haval256_4x32_init haval256_5_4way_init
|
||||
#define haval256_4x32_update haval256_5_4way_update
|
||||
#define haval256_4x32_close haval256_5_4way_close
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
typedef struct {
|
||||
@@ -100,11 +103,14 @@ typedef struct {
|
||||
typedef haval_8way_context haval256_5_8way_context;
|
||||
|
||||
void haval256_5_8way_init( void *cc );
|
||||
|
||||
void haval256_5_8way_update( void *cc, const void *data, size_t len );
|
||||
|
||||
void haval256_5_8way_close( void *cc, void *dst );
|
||||
|
||||
#define haval256_8x32_context haval256_5_8way_context
|
||||
#define haval256_8x32_init haval256_5_8way_init
|
||||
#define haval256_8x32_update haval256_5_8way_update
|
||||
#define haval256_8x32_close haval256_5_8way_close
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
#if defined(SIMD512)
|
||||
@@ -119,11 +125,14 @@ typedef struct {
|
||||
typedef haval_16way_context haval256_5_16way_context;
|
||||
|
||||
void haval256_5_16way_init( void *cc );
|
||||
|
||||
void haval256_5_16way_update( void *cc, const void *data, size_t len );
|
||||
|
||||
void haval256_5_16way_close( void *cc, void *dst );
|
||||
|
||||
#define haval256_16x32_context haval256_5_16way_context
|
||||
#define haval256_16x32_init haval256_5_16way_init
|
||||
#define haval256_16x32_update haval256_5_16way_update
|
||||
#define haval256_16x32_close haval256_5_16way_close
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -190,7 +190,7 @@ static void keccak64_8way_close( keccak64_ctx_m512i *kc, void *dst,
|
||||
memcpy_512( dst, kc->w, m512_len );
|
||||
}
|
||||
|
||||
void keccak256_8way_init( void *kc )
|
||||
void keccak256_8x64_init( void *kc )
|
||||
{
|
||||
keccak64_8way_init( kc, 256 );
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
void sha3d_hash_8way(void *state, const void *input)
|
||||
{
|
||||
uint32_t buffer[16*8] __attribute__ ((aligned (128)));
|
||||
keccak256_8way_context ctx;
|
||||
keccak256_8x64_context ctx;
|
||||
|
||||
keccak256_8x64_init( &ctx );
|
||||
keccak256_8x64_update( &ctx, input, 80 );
|
||||
@@ -69,7 +69,7 @@ int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
|
||||
void sha3d_hash_4way(void *state, const void *input)
|
||||
{
|
||||
uint32_t buffer[16*4] __attribute__ ((aligned (64)));
|
||||
keccak256_4way_context ctx;
|
||||
keccak256_4x64_context ctx;
|
||||
|
||||
keccak256_4x64_init( &ctx );
|
||||
keccak256_4x64_update( &ctx, input, 80 );
|
||||
|
||||
@@ -273,8 +273,6 @@ void finalization512_4way( luffa_4way_context *state, uint32 *b )
|
||||
uint32_t hash[8*4] __attribute((aligned(128)));
|
||||
__m512i* chainv = state->chainv;
|
||||
__m512i t[2];
|
||||
const __m512i shuff_bswap32 = mm512_bcast_m128( _mm_set_epi64x(
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) );
|
||||
|
||||
/*---- blank round with m=0 ----*/
|
||||
rnd512_4way( state, NULL );
|
||||
@@ -289,10 +287,8 @@ void finalization512_4way( luffa_4way_context *state, uint32 *b )
|
||||
_mm512_store_si512( (__m512i*)&hash[ 0], t[0] );
|
||||
_mm512_store_si512( (__m512i*)&hash[16], t[1] );
|
||||
|
||||
casti_m512i( b,0 ) = _mm512_shuffle_epi8(
|
||||
casti_m512i( hash,0 ), shuff_bswap32 );
|
||||
casti_m512i( b,1 ) = _mm512_shuffle_epi8(
|
||||
casti_m512i( hash,1 ), shuff_bswap32 );
|
||||
casti_m512i( b,0 ) = mm512_bswap_32( casti_m512i( hash,0 ) );
|
||||
casti_m512i( b,1 ) = mm512_bswap_32( casti_m512i( hash,1 ) );
|
||||
|
||||
rnd512_4way( state, NULL );
|
||||
|
||||
@@ -306,10 +302,8 @@ void finalization512_4way( luffa_4way_context *state, uint32 *b )
|
||||
_mm512_store_si512( (__m512i*)&hash[ 0], t[0] );
|
||||
_mm512_store_si512( (__m512i*)&hash[16], t[1] );
|
||||
|
||||
casti_m512i( b,2 ) = _mm512_shuffle_epi8(
|
||||
casti_m512i( hash,0 ), shuff_bswap32 );
|
||||
casti_m512i( b,3 ) = _mm512_shuffle_epi8(
|
||||
casti_m512i( hash,1 ), shuff_bswap32 );
|
||||
casti_m512i( b,2 ) = mm512_bswap_32( casti_m512i( hash,0 ) );
|
||||
casti_m512i( b,3 ) = mm512_bswap_32( casti_m512i( hash,1 ) );
|
||||
}
|
||||
|
||||
int luffa_4way_init( luffa_4way_context *state, int hashbitlen )
|
||||
@@ -349,16 +343,14 @@ int luffa_4way_update( luffa_4way_context *state, const void *data,
|
||||
__m512i msg[2];
|
||||
int i;
|
||||
int blocks = (int)len >> 5;
|
||||
const __m512i shuff_bswap32 = mm512_bcast_m128( _mm_set_epi64x(
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) );
|
||||
|
||||
state->rembytes = (int)len & 0x1F;
|
||||
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++, vdata+=2 )
|
||||
{
|
||||
msg[0] = _mm512_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[1] = _mm512_shuffle_epi8( vdata[ 1 ], shuff_bswap32 );
|
||||
msg[0] = mm512_bswap_32( vdata[ 0 ] );
|
||||
msg[1] = mm512_bswap_32( vdata[ 1 ] );
|
||||
rnd512_4way( state, msg );
|
||||
}
|
||||
|
||||
@@ -367,7 +359,7 @@ int luffa_4way_update( luffa_4way_context *state, const void *data,
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// remaining data bytes
|
||||
buffer[0] = _mm512_shuffle_epi8( vdata[0], shuff_bswap32 );
|
||||
buffer[0] = mm512_bswap_32( vdata[0] );
|
||||
buffer[1] = mm512_bcast128lo_64( 0x0000000080000000 );
|
||||
}
|
||||
return 0;
|
||||
@@ -434,16 +426,14 @@ int luffa512_4way_full( luffa_4way_context *state, void *output,
|
||||
__m512i msg[2];
|
||||
int i;
|
||||
const int blocks = (int)( inlen >> 5 );
|
||||
const __m512i shuff_bswap32 = mm512_bcast_m128( _mm_set_epi64x(
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) );
|
||||
|
||||
state->rembytes = inlen & 0x1F;
|
||||
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++, vdata+=2 )
|
||||
{
|
||||
msg[0] = _mm512_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[1] = _mm512_shuffle_epi8( vdata[ 1 ], shuff_bswap32 );
|
||||
msg[0] = mm512_bswap_32( vdata[ 0 ] );
|
||||
msg[1] = mm512_bswap_32( vdata[ 1 ] );
|
||||
rnd512_4way( state, msg );
|
||||
}
|
||||
|
||||
@@ -451,7 +441,7 @@ int luffa512_4way_full( luffa_4way_context *state, void *output,
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// padding of partial block
|
||||
msg[0] = _mm512_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[0] = mm512_bswap_32( vdata[ 0 ] );
|
||||
msg[1] = mm512_bcast128lo_64( 0x0000000080000000 );
|
||||
rnd512_4way( state, msg );
|
||||
}
|
||||
@@ -479,16 +469,14 @@ int luffa_4way_update_close( luffa_4way_context *state,
|
||||
__m512i msg[2];
|
||||
int i;
|
||||
const int blocks = (int)( inlen >> 5 );
|
||||
const __m512i shuff_bswap32 = mm512_bcast_m128( _mm_set_epi64x(
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) );
|
||||
|
||||
state->rembytes = inlen & 0x1F;
|
||||
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++, vdata+=2 )
|
||||
{
|
||||
msg[0] = _mm512_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[1] = _mm512_shuffle_epi8( vdata[ 1 ], shuff_bswap32 );
|
||||
msg[0] = mm512_bswap_32( vdata[ 0 ] );
|
||||
msg[1] = mm512_bswap_32( vdata[ 1 ] );
|
||||
rnd512_4way( state, msg );
|
||||
}
|
||||
|
||||
@@ -496,7 +484,7 @@ int luffa_4way_update_close( luffa_4way_context *state,
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// padding of partial block
|
||||
msg[0] = _mm512_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[0] = mm512_bswap_32( vdata[ 0 ] );
|
||||
msg[1] = mm512_bcast128lo_64( 0x0000000080000000 );
|
||||
rnd512_4way( state, msg );
|
||||
}
|
||||
@@ -775,8 +763,6 @@ void finalization512_2way( luffa_2way_context *state, uint32 *b )
|
||||
uint32 hash[8*2] __attribute((aligned(64)));
|
||||
__m256i* chainv = state->chainv;
|
||||
__m256i t0, t1;
|
||||
const __m256i shuff_bswap32 = mm256_set2_64( 0x0c0d0e0f08090a0b,
|
||||
0x0405060700010203 );
|
||||
/*---- blank round with m=0 ----*/
|
||||
rnd512_2way( state, NULL );
|
||||
|
||||
@@ -791,10 +777,8 @@ void finalization512_2way( luffa_2way_context *state, uint32 *b )
|
||||
_mm256_store_si256( (__m256i*)&hash[0], t0 );
|
||||
_mm256_store_si256( (__m256i*)&hash[8], t1 );
|
||||
|
||||
casti_m256i( b, 0 ) = _mm256_shuffle_epi8(
|
||||
casti_m256i( hash, 0 ), shuff_bswap32 );
|
||||
casti_m256i( b, 1 ) = _mm256_shuffle_epi8(
|
||||
casti_m256i( hash, 1 ), shuff_bswap32 );
|
||||
casti_m256i( b, 0 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
|
||||
casti_m256i( b, 1 ) = mm256_bswap_32( casti_m256i( hash, 1 ) );
|
||||
|
||||
rnd512_2way( state, NULL );
|
||||
|
||||
@@ -809,10 +793,8 @@ void finalization512_2way( luffa_2way_context *state, uint32 *b )
|
||||
_mm256_store_si256( (__m256i*)&hash[0], t0 );
|
||||
_mm256_store_si256( (__m256i*)&hash[8], t1 );
|
||||
|
||||
casti_m256i( b, 2 ) = _mm256_shuffle_epi8(
|
||||
casti_m256i( hash, 0 ), shuff_bswap32 );
|
||||
casti_m256i( b, 3 ) = _mm256_shuffle_epi8(
|
||||
casti_m256i( hash, 1 ), shuff_bswap32 );
|
||||
casti_m256i( b, 2 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
|
||||
casti_m256i( b, 3 ) = mm256_bswap_32( casti_m256i( hash, 1 ) );
|
||||
}
|
||||
|
||||
int luffa_2way_init( luffa_2way_context *state, int hashbitlen )
|
||||
@@ -847,15 +829,13 @@ int luffa_2way_update( luffa_2way_context *state, const void *data,
|
||||
__m256i msg[2];
|
||||
int i;
|
||||
int blocks = (int)len >> 5;
|
||||
const __m256i shuff_bswap32 = mm256_set2_64( 0x0c0d0e0f08090a0b,
|
||||
0x0405060700010203 );
|
||||
state-> rembytes = (int)len & 0x1F;
|
||||
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++, vdata+=2 )
|
||||
{
|
||||
msg[0] = _mm256_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[1] = _mm256_shuffle_epi8( vdata[ 1 ], shuff_bswap32 );
|
||||
msg[0] = mm256_bswap_32( vdata[ 0 ] );
|
||||
msg[1] = mm256_bswap_32( vdata[ 1 ] );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
||||
@@ -864,7 +844,7 @@ int luffa_2way_update( luffa_2way_context *state, const void *data,
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// remaining data bytes
|
||||
buffer[0] = _mm256_shuffle_epi8( vdata[0], shuff_bswap32 );
|
||||
buffer[0] = mm256_bswap_32( vdata[0] );
|
||||
buffer[1] = mm256_bcast128lo_64( 0x0000000080000000 );
|
||||
}
|
||||
return 0;
|
||||
@@ -916,16 +896,14 @@ int luffa512_2way_full( luffa_2way_context *state, void *output,
|
||||
__m256i msg[2];
|
||||
int i;
|
||||
const int blocks = (int)( inlen >> 5 );
|
||||
const __m256i shuff_bswap32 = mm256_set2_64( 0x0c0d0e0f08090a0b,
|
||||
0x0405060700010203 );
|
||||
|
||||
state->rembytes = inlen & 0x1F;
|
||||
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++, vdata+=2 )
|
||||
{
|
||||
msg[0] = _mm256_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[1] = _mm256_shuffle_epi8( vdata[ 1 ], shuff_bswap32 );
|
||||
msg[0] = mm256_bswap_32( vdata[ 0 ] );
|
||||
msg[1] = mm256_bswap_32( vdata[ 1 ] );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
||||
@@ -933,7 +911,7 @@ int luffa512_2way_full( luffa_2way_context *state, void *output,
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// padding of partial block
|
||||
msg[0] = _mm256_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[0] = mm256_bswap_32( vdata[ 0 ] );
|
||||
msg[1] = mm256_bcast128lo_64( 0x0000000080000000 );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
@@ -961,16 +939,14 @@ int luffa_2way_update_close( luffa_2way_context *state,
|
||||
__m256i msg[2];
|
||||
int i;
|
||||
const int blocks = (int)( inlen >> 5 );
|
||||
const __m256i shuff_bswap32 = mm256_set2_64( 0x0c0d0e0f08090a0b,
|
||||
0x0405060700010203 );
|
||||
|
||||
state->rembytes = inlen & 0x1F;
|
||||
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++, vdata+=2 )
|
||||
{
|
||||
msg[0] = _mm256_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[1] = _mm256_shuffle_epi8( vdata[ 1 ], shuff_bswap32 );
|
||||
msg[0] = mm256_bswap_32( vdata[ 0 ] );
|
||||
msg[1] = mm256_bswap_32( vdata[ 1 ] );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
||||
@@ -978,7 +954,7 @@ int luffa_2way_update_close( luffa_2way_context *state,
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// padding of partial block
|
||||
msg[0] = _mm256_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[0] = mm256_bswap_32( vdata[ 0 ] );
|
||||
msg[1] = mm256_bcast128lo_64( 0x0000000080000000 );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
||||
@@ -26,9 +26,9 @@
|
||||
#if defined (ALLIUM_16WAY)
|
||||
|
||||
typedef union {
|
||||
keccak256_8way_context keccak;
|
||||
keccak256_8x64_context keccak;
|
||||
cube_4way_2buf_context cube;
|
||||
skein256_8way_context skein;
|
||||
skein256_8x64_context skein;
|
||||
#if defined(__VAES__)
|
||||
groestl256_4way_context groestl;
|
||||
#else
|
||||
@@ -60,7 +60,7 @@ static void allium_16way_hash( void *state, const void *midstate_vars,
|
||||
uint32_t hash15[8] __attribute__ ((aligned (32)));
|
||||
allium_16way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
blake256_16way_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
|
||||
blake256_16x32_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
|
||||
|
||||
dintrlv_16x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
hash8, hash9, hash10, hash11, hash12, hash13, hash14, hash15,
|
||||
@@ -70,12 +70,12 @@ static void allium_16way_hash( void *state, const void *midstate_vars,
|
||||
intrlv_8x64( vhashB, hash8, hash9, hash10, hash11, hash12, hash13, hash14,
|
||||
hash15, 256 );
|
||||
|
||||
keccak256_8way_init( &ctx.keccak );
|
||||
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
|
||||
keccak256_8way_close( &ctx.keccak, vhashA);
|
||||
keccak256_8way_init( &ctx.keccak );
|
||||
keccak256_8way_update( &ctx.keccak, vhashB, 32 );
|
||||
keccak256_8way_close( &ctx.keccak, vhashB);
|
||||
keccak256_8x64_init( &ctx.keccak );
|
||||
keccak256_8x64_update( &ctx.keccak, vhashA, 32 );
|
||||
keccak256_8x64_close( &ctx.keccak, vhashA);
|
||||
keccak256_8x64_init( &ctx.keccak );
|
||||
keccak256_8x64_update( &ctx.keccak, vhashB, 32 );
|
||||
keccak256_8x64_close( &ctx.keccak, vhashB);
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhashA, 256 );
|
||||
@@ -153,12 +153,12 @@ static void allium_16way_hash( void *state, const void *midstate_vars,
|
||||
intrlv_8x64( vhashB, hash8, hash9, hash10, hash11, hash12, hash13, hash14,
|
||||
hash15, 256 );
|
||||
|
||||
skein256_8way_init( &ctx.skein );
|
||||
skein256_8way_update( &ctx.skein, vhashA, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhashA );
|
||||
skein256_8way_init( &ctx.skein );
|
||||
skein256_8way_update( &ctx.skein, vhashB, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhashB );
|
||||
skein256_8x64_init( &ctx.skein );
|
||||
skein256_8x64_update( &ctx.skein, vhashA, 32 );
|
||||
skein256_8x64_close( &ctx.skein, vhashA );
|
||||
skein256_8x64_init( &ctx.skein );
|
||||
skein256_8x64_update( &ctx.skein, vhashB, 32 );
|
||||
skein256_8x64_close( &ctx.skein, vhashB );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhashA, 256 );
|
||||
@@ -251,7 +251,7 @@ int scanhash_allium_16way( struct work *work, uint32_t max_nonce,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+ 1, n );
|
||||
|
||||
// Partialy prehash second block without touching nonces in block_buf[3].
|
||||
blake256_16way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
blake256_16x32_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
|
||||
do {
|
||||
allium_16way_hash( hash, midstate_vars, block0_hash, block_buf );
|
||||
@@ -273,9 +273,9 @@ int scanhash_allium_16way( struct work *work, uint32_t max_nonce,
|
||||
#elif defined (ALLIUM_8WAY)
|
||||
|
||||
typedef union {
|
||||
keccak256_4way_context keccak;
|
||||
keccak256_4x64_context keccak;
|
||||
cube_2way_context cube;
|
||||
skein256_4way_context skein;
|
||||
skein256_4x64_context skein;
|
||||
#if defined(__VAES__)
|
||||
groestl256_2way_context groestl;
|
||||
#else
|
||||
@@ -298,19 +298,19 @@ static void allium_8way_hash( void *hash, const void *midstate_vars,
|
||||
uint64_t *hash7 = (uint64_t*)hash+28;
|
||||
allium_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
blake256_8way_final_rounds_le( vhashA, midstate_vars, midhash, block, 14 );
|
||||
blake256_8x32_final_rounds_le( vhashA, midstate_vars, midhash, block, 14 );
|
||||
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhashA, 256 );
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 256 );
|
||||
intrlv_4x64( vhashB, hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
keccak256_4way_init( &ctx.keccak );
|
||||
keccak256_4way_update( &ctx.keccak, vhashA, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhashA );
|
||||
keccak256_4way_init( &ctx.keccak );
|
||||
keccak256_4way_update( &ctx.keccak, vhashB, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhashB );
|
||||
keccak256_4x64_init( &ctx.keccak );
|
||||
keccak256_4x64_update( &ctx.keccak, vhashA, 32 );
|
||||
keccak256_4x64_close( &ctx.keccak, vhashA );
|
||||
keccak256_4x64_init( &ctx.keccak );
|
||||
keccak256_4x64_update( &ctx.keccak, vhashB, 32 );
|
||||
keccak256_4x64_close( &ctx.keccak, vhashB );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
dintrlv_4x64( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
@@ -350,12 +350,12 @@ static void allium_8way_hash( void *hash, const void *midstate_vars,
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 256 );
|
||||
intrlv_4x64( vhashB, hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
skein256_4way_init( &ctx.skein );
|
||||
skein256_4way_update( &ctx.skein, vhashA, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhashA );
|
||||
skein256_4way_init( &ctx.skein );
|
||||
skein256_4way_update( &ctx.skein, vhashB, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhashB );
|
||||
skein256_4x64_init( &ctx.skein );
|
||||
skein256_4x64_update( &ctx.skein, vhashA, 32 );
|
||||
skein256_4x64_close( &ctx.skein, vhashA );
|
||||
skein256_4x64_init( &ctx.skein );
|
||||
skein256_4x64_update( &ctx.skein, vhashB, 32 );
|
||||
skein256_4x64_close( &ctx.skein, vhashB );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -433,7 +433,7 @@ int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
||||
n+ 3, n+ 2, n+ 1, n );
|
||||
|
||||
// Partialy prehash second block without touching nonces
|
||||
blake256_8way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
blake256_8x32_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
|
||||
do {
|
||||
allium_8way_hash( hash, midstate_vars, block0_hash, block_buf );
|
||||
@@ -483,7 +483,7 @@ static void allium_4way_hash( void *hash, const void *midstate_vars,
|
||||
uint64_t *hash3 = (uint64_t*)hash+12;
|
||||
allium_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
blake256_4way_final_rounds_le( vhashA, midstate_vars, midhash, block, 14 );
|
||||
blake256_4x32_final_rounds_le( vhashA, midstate_vars, midhash, block, 14 );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
|
||||
intrlv_2x64( vhashA, hash0, hash1, 256 );
|
||||
@@ -588,7 +588,7 @@ int scanhash_allium_4way( struct work *work, uint32_t max_nonce,
|
||||
block_buf[15] = v128_32( 640 );
|
||||
|
||||
// Partialy prehash second block without touching nonces
|
||||
blake256_4way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
blake256_4x32_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
|
||||
do {
|
||||
allium_4way_hash( hash, midstate_vars, block0_hash, block_buf );
|
||||
@@ -616,7 +616,6 @@ int scanhash_allium_4way( struct work *work, uint32_t max_nonce,
|
||||
//
|
||||
// 1 way
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
blake256_context blake;
|
||||
|
||||
@@ -14,12 +14,12 @@ bool lyra2h_4way_thread_init()
|
||||
return ( lyra2h_4way_matrix = mm_malloc( LYRA2H_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_4way_context l2h_4way_blake_mid;
|
||||
static __thread blake256_4x32_context l2h_4way_blake_mid;
|
||||
|
||||
void lyra2h_4way_midstate( const void* input )
|
||||
{
|
||||
blake256_4way_init( &l2h_4way_blake_mid );
|
||||
blake256_4way_update( &l2h_4way_blake_mid, input, 64 );
|
||||
blake256_4x32_init( &l2h_4way_blake_mid );
|
||||
blake256_4x32_update( &l2h_4way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2h_4way_hash( void *state, const void *input )
|
||||
@@ -29,11 +29,11 @@ void lyra2h_4way_hash( void *state, const void *input )
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake256_4way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
blake256_4x32_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &l2h_4way_blake_mid, sizeof l2h_4way_blake_mid );
|
||||
blake256_4way_update( &ctx_blake, input + (64*4), 16 );
|
||||
blake256_4way_close( &ctx_blake, vhash );
|
||||
blake256_4x32_update( &ctx_blake, input + (64*4), 16 );
|
||||
blake256_4x32_close( &ctx_blake, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
|
||||
|
||||
@@ -7,25 +7,24 @@
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
|
||||
|
||||
#if defined (LYRA2REV2_16WAY)
|
||||
|
||||
typedef struct {
|
||||
blake256_16way_context blake;
|
||||
keccak256_8way_context keccak;
|
||||
blake256_16x32_context blake;
|
||||
keccak256_8x64_context keccak;
|
||||
cubehashParam cube;
|
||||
skein256_8way_context skein;
|
||||
bmw256_16way_context bmw;
|
||||
skein256_8x64_context skein;
|
||||
bmw256_16x32_context bmw;
|
||||
} lyra2v2_16way_ctx_holder __attribute__ ((aligned (64)));
|
||||
|
||||
static lyra2v2_16way_ctx_holder l2v2_16way_ctx;
|
||||
|
||||
bool init_lyra2rev2_16way_ctx()
|
||||
{
|
||||
keccak256_8way_init( &l2v2_16way_ctx.keccak );
|
||||
keccak256_8x64_init( &l2v2_16way_ctx.keccak );
|
||||
cubehashInit( &l2v2_16way_ctx.cube, 256, 16, 32 );
|
||||
skein256_8way_init( &l2v2_16way_ctx.skein );
|
||||
bmw256_16way_init( &l2v2_16way_ctx.bmw );
|
||||
skein256_8x64_init( &l2v2_16way_ctx.skein );
|
||||
bmw256_16x32_init( &l2v2_16way_ctx.bmw );
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -51,8 +50,8 @@ void lyra2rev2_16way_hash( void *state, const void *input )
|
||||
lyra2v2_16way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &l2v2_16way_ctx, sizeof(l2v2_16way_ctx) );
|
||||
|
||||
blake256_16way_update( &ctx.blake, input + (64<<4), 16 );
|
||||
blake256_16way_close( &ctx.blake, vhash );
|
||||
blake256_16x32_update( &ctx.blake, input + (64<<4), 16 );
|
||||
blake256_16x32_close( &ctx.blake, vhash );
|
||||
|
||||
dintrlv_16x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7,
|
||||
@@ -62,17 +61,17 @@ void lyra2rev2_16way_hash( void *state, const void *input )
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
keccak256_8way_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_8way_close( &ctx.keccak, vhash );
|
||||
keccak256_8x64_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_8x64_close( &ctx.keccak, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
intrlv_8x64( vhash, hash8, hash9, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15, 256 );
|
||||
|
||||
keccak256_8way_init( &ctx.keccak );
|
||||
keccak256_8way_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_8way_close( &ctx.keccak, vhash );
|
||||
keccak256_8x64_init( &ctx.keccak );
|
||||
keccak256_8x64_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_8x64_close( &ctx.keccak, vhash );
|
||||
|
||||
dintrlv_8x64( hash8, hash9, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15, vhash, 256 );
|
||||
@@ -122,21 +121,20 @@ void lyra2rev2_16way_hash( void *state, const void *input )
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, 256 );
|
||||
skein256_8way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhash );
|
||||
skein256_8x64_update( &ctx.skein, vhash, 32 );
|
||||
skein256_8x64_close( &ctx.skein, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
intrlv_8x64( vhash, hash8, hash9, hash10, hash11, hash12,
|
||||
hash13, hash14, hash15, 256 );
|
||||
|
||||
skein256_8way_init( &ctx.skein );
|
||||
skein256_8way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhash );
|
||||
skein256_8x64_init( &ctx.skein );
|
||||
skein256_8x64_update( &ctx.skein, vhash, 32 );
|
||||
skein256_8x64_close( &ctx.skein, vhash );
|
||||
|
||||
dintrlv_8x64( hash8, hash9, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15, vhash, 256 );
|
||||
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||
@@ -160,8 +158,8 @@ void lyra2rev2_16way_hash( void *state, const void *input )
|
||||
hash8, hash9, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15, 256 );
|
||||
|
||||
bmw256_16way_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_16way_close( &ctx.bmw, state );
|
||||
bmw256_16x32_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_16x32_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev2_16way( struct work *work, const uint32_t max_nonce,
|
||||
@@ -186,8 +184,8 @@ int scanhash_lyra2rev2_16way( struct work *work, const uint32_t max_nonce,
|
||||
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+ 1, n );
|
||||
blake256_16way_init( &l2v2_16way_ctx.blake );
|
||||
blake256_16way_update( &l2v2_16way_ctx.blake, vdata, 64 );
|
||||
blake256_16x32_init( &l2v2_16way_ctx.blake );
|
||||
blake256_16x32_update( &l2v2_16way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
@@ -214,21 +212,21 @@ int scanhash_lyra2rev2_16way( struct work *work, const uint32_t max_nonce,
|
||||
#elif defined (LYRA2REV2_8WAY)
|
||||
|
||||
typedef struct {
|
||||
blake256_8way_context blake;
|
||||
keccak256_4way_context keccak;
|
||||
blake256_8x32_context blake;
|
||||
keccak256_4x64_context keccak;
|
||||
cubehashParam cube;
|
||||
skein256_4way_context skein;
|
||||
bmw256_8way_context bmw;
|
||||
skein256_4x64_context skein;
|
||||
bmw256_8x32_context bmw;
|
||||
} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
|
||||
|
||||
static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
|
||||
|
||||
bool init_lyra2rev2_8way_ctx()
|
||||
{
|
||||
keccak256_4way_init( &l2v2_8way_ctx.keccak );
|
||||
keccak256_4x64_init( &l2v2_8way_ctx.keccak );
|
||||
cubehashInit( &l2v2_8way_ctx.cube, 256, 16, 32 );
|
||||
skein256_4way_init( &l2v2_8way_ctx.skein );
|
||||
bmw256_8way_init( &l2v2_8way_ctx.bmw );
|
||||
skein256_4x64_init( &l2v2_8way_ctx.skein );
|
||||
bmw256_8x32_init( &l2v2_8way_ctx.bmw );
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -246,20 +244,20 @@ void lyra2rev2_8way_hash( void *state, const void *input )
|
||||
lyra2v2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &l2v2_8way_ctx, sizeof(l2v2_8way_ctx) );
|
||||
|
||||
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
|
||||
blake256_8way_close( &ctx.blake, vhash );
|
||||
blake256_8x32_update( &ctx.blake, input + (64<<3), 16 );
|
||||
blake256_8x32_close( &ctx.blake, vhash );
|
||||
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
keccak256_4way_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhash );
|
||||
keccak256_4x64_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_4x64_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
|
||||
keccak256_4way_init( &ctx.keccak );
|
||||
keccak256_4way_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhash );
|
||||
keccak256_4x64_init( &ctx.keccak );
|
||||
keccak256_4x64_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_4x64_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
@@ -282,13 +280,13 @@ void lyra2rev2_8way_hash( void *state, const void *input )
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash7, 32, hash7, 32, hash7, 32, 1, 4, 4 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
skein256_4way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhash );
|
||||
skein256_4x64_update( &ctx.skein, vhash, 32 );
|
||||
skein256_4x64_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
|
||||
skein256_4way_init( &ctx.skein );
|
||||
skein256_4way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhash );
|
||||
skein256_4x64_init( &ctx.skein );
|
||||
skein256_4x64_update( &ctx.skein, vhash, 32 );
|
||||
skein256_4x64_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
@@ -303,8 +301,8 @@ void lyra2rev2_8way_hash( void *state, const void *input )
|
||||
intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
bmw256_8way_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_8way_close( &ctx.bmw, state );
|
||||
bmw256_8x32_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_8x32_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
|
||||
@@ -328,8 +326,8 @@ int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
|
||||
blake256_8way_init( &l2v2_8way_ctx.blake );
|
||||
blake256_8way_update( &l2v2_8way_ctx.blake, vdata, 64 );
|
||||
blake256_8x32_init( &l2v2_8way_ctx.blake );
|
||||
blake256_8x32_update( &l2v2_8way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
@@ -356,21 +354,21 @@ int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
|
||||
#elif defined (LYRA2REV2_4WAY)
|
||||
|
||||
typedef struct {
|
||||
blake256_4way_context blake;
|
||||
keccak256_4way_context keccak;
|
||||
blake256_4x32_context blake;
|
||||
keccak256_4x64_context keccak;
|
||||
cubehashParam cube;
|
||||
skein256_4way_context skein;
|
||||
bmw256_4way_context bmw;
|
||||
skein256_4x64_context skein;
|
||||
bmw256_4x32_context bmw;
|
||||
} lyra2v2_4way_ctx_holder;
|
||||
|
||||
static lyra2v2_4way_ctx_holder l2v2_4way_ctx;
|
||||
|
||||
bool init_lyra2rev2_4way_ctx()
|
||||
{
|
||||
keccak256_4way_init( &l2v2_4way_ctx.keccak );
|
||||
keccak256_4x64_init( &l2v2_4way_ctx.keccak );
|
||||
cubehashInit( &l2v2_4way_ctx.cube, 256, 16, 32 );
|
||||
skein256_4way_init( &l2v2_4way_ctx.skein );
|
||||
bmw256_4way_init( &l2v2_4way_ctx.bmw );
|
||||
skein256_4x64_init( &l2v2_4way_ctx.skein );
|
||||
bmw256_4x32_init( &l2v2_4way_ctx.bmw );
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -385,13 +383,13 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
lyra2v2_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &l2v2_4way_ctx, sizeof(l2v2_4way_ctx) );
|
||||
|
||||
blake256_4way_update( &ctx.blake, input + (64<<2), 16 );
|
||||
blake256_4way_close( &ctx.blake, vhash );
|
||||
blake256_4x32_update( &ctx.blake, input + (64<<2), 16 );
|
||||
blake256_4x32_close( &ctx.blake, vhash );
|
||||
|
||||
rintrlv_4x32_4x64( vhash64, vhash, 256 );
|
||||
|
||||
keccak256_4way_update( &ctx.keccak, vhash64, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhash64 );
|
||||
keccak256_4x64_update( &ctx.keccak, vhash64, 32 );
|
||||
keccak256_4x64_close( &ctx.keccak, vhash64 );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
@@ -410,8 +408,8 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
|
||||
intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
||||
|
||||
skein256_4way_update( &ctx.skein, vhash64, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhash64 );
|
||||
skein256_4x64_update( &ctx.skein, vhash64, 32 );
|
||||
skein256_4x64_close( &ctx.skein, vhash64 );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
@@ -426,8 +424,8 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
|
||||
bmw256_4way_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_4way_close( &ctx.bmw, state );
|
||||
bmw256_4x32_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_4x32_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
||||
@@ -451,8 +449,8 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
v128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
|
||||
blake256_4way_init( &l2v2_4way_ctx.blake );
|
||||
blake256_4way_update( &l2v2_4way_ctx.blake, vdata, 64 );
|
||||
blake256_4x32_init( &l2v2_4way_ctx.blake );
|
||||
blake256_4x32_update( &l2v2_4way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
|
||||
@@ -9,18 +9,18 @@
|
||||
#if defined (LYRA2REV3_16WAY)
|
||||
|
||||
typedef struct {
|
||||
blake256_16way_context blake;
|
||||
blake256_16x32_context blake;
|
||||
cube_4way_context cube;
|
||||
bmw256_16way_context bmw;
|
||||
bmw256_16x32_context bmw;
|
||||
} lyra2v3_16way_ctx_holder;
|
||||
|
||||
static __thread lyra2v3_16way_ctx_holder l2v3_16way_ctx;
|
||||
|
||||
bool init_lyra2rev3_16way_ctx()
|
||||
{
|
||||
blake256_16way_init( &l2v3_16way_ctx.blake );
|
||||
blake256_16x32_init( &l2v3_16way_ctx.blake );
|
||||
cube_4way_init( &l2v3_16way_ctx.cube, 256, 16, 32 );
|
||||
bmw256_16way_init( &l2v3_16way_ctx.bmw );
|
||||
bmw256_16x32_init( &l2v3_16way_ctx.bmw );
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -46,8 +46,8 @@ void lyra2rev3_16way_hash( void *state, const void *input )
|
||||
lyra2v3_16way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &l2v3_16way_ctx, sizeof(l2v3_16way_ctx) );
|
||||
|
||||
blake256_16way_update( &ctx.blake, input + (64*16), 16 );
|
||||
blake256_16way_close( &ctx.blake, vhash );
|
||||
blake256_16x32_update( &ctx.blake, input + (64*16), 16 );
|
||||
blake256_16x32_close( &ctx.blake, vhash );
|
||||
|
||||
dintrlv_16x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
hash8, hash9, hash10, hash11 ,hash12, hash13, hash14, hash15,
|
||||
@@ -120,8 +120,8 @@ void lyra2rev3_16way_hash( void *state, const void *input )
|
||||
hash7, hash8, hash9, hash10, hash11, hash12, hash13, hash14,
|
||||
hash15, 256 );
|
||||
|
||||
bmw256_16way_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_16way_close( &ctx.bmw, state );
|
||||
bmw256_16x32_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_16x32_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
|
||||
@@ -145,8 +145,8 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
|
||||
|
||||
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||
|
||||
blake256_16way_init( &l2v3_16way_ctx.blake );
|
||||
blake256_16way_update( &l2v3_16way_ctx.blake, vdata, 64 );
|
||||
blake256_16x32_init( &l2v3_16way_ctx.blake );
|
||||
blake256_16x32_update( &l2v3_16way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
@@ -178,18 +178,18 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
|
||||
#elif defined (LYRA2REV3_8WAY)
|
||||
|
||||
typedef struct {
|
||||
blake256_8way_context blake;
|
||||
blake256_8x32_context blake;
|
||||
cubehashParam cube;
|
||||
bmw256_8way_context bmw;
|
||||
bmw256_8x32_context bmw;
|
||||
} lyra2v3_8way_ctx_holder;
|
||||
|
||||
static __thread lyra2v3_8way_ctx_holder l2v3_8way_ctx;
|
||||
|
||||
bool init_lyra2rev3_8way_ctx()
|
||||
{
|
||||
blake256_8way_init( &l2v3_8way_ctx.blake );
|
||||
blake256_8x32_init( &l2v3_8way_ctx.blake );
|
||||
cubehashInit( &l2v3_8way_ctx.cube, 256, 16, 32 );
|
||||
bmw256_8way_init( &l2v3_8way_ctx.bmw );
|
||||
bmw256_8x32_init( &l2v3_8way_ctx.bmw );
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -207,8 +207,8 @@ void lyra2rev3_8way_hash( void *state, const void *input )
|
||||
lyra2v3_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &l2v3_8way_ctx, sizeof(l2v3_8way_ctx) );
|
||||
|
||||
blake256_8way_update( &ctx.blake, input + (64*8), 16 );
|
||||
blake256_8way_close( &ctx.blake, vhash );
|
||||
blake256_8x32_update( &ctx.blake, input + (64*8), 16 );
|
||||
blake256_8x32_close( &ctx.blake, vhash );
|
||||
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
@@ -243,8 +243,8 @@ void lyra2rev3_8way_hash( void *state, const void *input )
|
||||
intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
bmw256_8way_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_8way_close( &ctx.bmw, state );
|
||||
bmw256_8x32_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_8x32_close( &ctx.bmw, state );
|
||||
|
||||
}
|
||||
|
||||
@@ -269,8 +269,8 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
|
||||
blake256_8way_init( &l2v3_8way_ctx.blake );
|
||||
blake256_8way_update( &l2v3_8way_ctx.blake, vdata, 64 );
|
||||
blake256_8x32_init( &l2v3_8way_ctx.blake );
|
||||
blake256_8x32_update( &l2v3_8way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
@@ -300,19 +300,18 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
|
||||
#if defined (LYRA2REV3_4WAY)
|
||||
|
||||
typedef struct {
|
||||
blake256_4way_context blake;
|
||||
blake256_4x32_context blake;
|
||||
cubehashParam cube;
|
||||
bmw256_4way_context bmw;
|
||||
bmw256_4x32_context bmw;
|
||||
} lyra2v3_4way_ctx_holder;
|
||||
|
||||
//static lyra2v3_4way_ctx_holder l2v3_4way_ctx;
|
||||
static __thread lyra2v3_4way_ctx_holder l2v3_4way_ctx;
|
||||
|
||||
bool init_lyra2rev3_4way_ctx()
|
||||
{
|
||||
blake256_4way_init( &l2v3_4way_ctx.blake );
|
||||
blake256_4x32_init( &l2v3_4way_ctx.blake );
|
||||
cubehashInit( &l2v3_4way_ctx.cube, 256, 16, 32 );
|
||||
bmw256_4way_init( &l2v3_4way_ctx.bmw );
|
||||
bmw256_4x32_init( &l2v3_4way_ctx.bmw );
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -326,8 +325,8 @@ void lyra2rev3_4way_hash( void *state, const void *input )
|
||||
lyra2v3_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &l2v3_4way_ctx, sizeof(l2v3_4way_ctx) );
|
||||
|
||||
blake256_4way_update( &ctx.blake, input + (64*4), 16 );
|
||||
blake256_4way_close( &ctx.blake, vhash );
|
||||
blake256_4x32_update( &ctx.blake, input + (64*4), 16 );
|
||||
blake256_4x32_close( &ctx.blake, vhash );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
|
||||
@@ -349,8 +348,8 @@ void lyra2rev3_4way_hash( void *state, const void *input )
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
bmw256_4way_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_4way_close( &ctx.bmw, state );
|
||||
bmw256_4x32_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_4x32_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce,
|
||||
@@ -374,8 +373,8 @@ int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce,
|
||||
v128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
*noncev = _mm_set_epi32( n+3, n+2, n+1, n );
|
||||
|
||||
blake256_4way_init( &l2v3_4way_ctx.blake );
|
||||
blake256_4way_update( &l2v3_4way_ctx.blake, vdata, 64 );
|
||||
blake256_4x32_init( &l2v3_4way_ctx.blake );
|
||||
blake256_4x32_update( &l2v3_4way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
|
||||
@@ -45,7 +45,7 @@ static void lyra2z_16way_hash( void *state, const void *midstate_vars,
|
||||
uint32_t hash14[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash15[8] __attribute__ ((aligned (32)));
|
||||
|
||||
blake256_16way_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
|
||||
blake256_16x32_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
|
||||
|
||||
dintrlv_16x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
hash8, hash9, hash10, hash11 ,hash12, hash13, hash14, hash15,
|
||||
@@ -139,7 +139,7 @@ int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n +1, n );
|
||||
|
||||
// Partialy prehash second block without touching nonces in block_buf[3].
|
||||
blake256_16way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
blake256_16x32_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
|
||||
do {
|
||||
lyra2z_16way_hash( hash, midstate_vars, block0_hash, block_buf );
|
||||
@@ -180,7 +180,7 @@ static void lyra2z_8way_hash( void *state, const void *midstate_vars,
|
||||
uint32_t hash7[8] __attribute__ ((aligned (32)));
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
|
||||
blake256_8way_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
|
||||
blake256_8x32_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
|
||||
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
@@ -246,7 +246,7 @@ int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
_mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n +1, n );
|
||||
|
||||
// Partialy prehash second block without touching nonces
|
||||
blake256_8way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
blake256_8x32_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
|
||||
do {
|
||||
lyra2z_8way_hash( hash, midstate_vars, block0_hash, block_buf );
|
||||
@@ -279,12 +279,12 @@ bool lyra2z_4way_thread_init()
|
||||
return ( lyra2z_4way_matrix = mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_4way_context l2z_4way_blake_mid;
|
||||
static __thread blake256_4x32_context l2z_4way_blake_mid;
|
||||
|
||||
void lyra2z_4way_midstate( const void* input )
|
||||
{
|
||||
blake256_4way_init( &l2z_4way_blake_mid );
|
||||
blake256_4way_update( &l2z_4way_blake_mid, input, 64 );
|
||||
blake256_4x32_init( &l2z_4way_blake_mid );
|
||||
blake256_4x32_update( &l2z_4way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2z_4way_hash( void *hash, const void *midstate_vars,
|
||||
@@ -295,15 +295,8 @@ void lyra2z_4way_hash( void *hash, const void *midstate_vars,
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
// blake256_4way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
blake256_4way_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
|
||||
|
||||
/*
|
||||
memcpy( &ctx_blake, &l2z_4way_blake_mid, sizeof l2z_4way_blake_mid );
|
||||
blake256_4way_update( &ctx_blake, input + (64*4), 16 );
|
||||
blake256_4way_close( &ctx_blake, vhash );
|
||||
*/
|
||||
blake256_4x32_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
|
||||
@@ -357,7 +350,7 @@ int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce,
|
||||
block_buf[15] = v128_32( 640 );
|
||||
|
||||
// Partialy prehash second block without touching nonces
|
||||
blake256_4way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
blake256_4x32_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
|
||||
do {
|
||||
lyra2z_4way_hash( hash, midstate_vars, block0_hash, block_buf );
|
||||
@@ -454,11 +447,9 @@ bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
#if defined(LYRA2Z_16WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_16way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_16way;
|
||||
// gate->hash = (void*)&lyra2z_16way_hash;
|
||||
#elif defined(LYRA2Z_8WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_8way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_8way;
|
||||
// gate->hash = (void*)&lyra2z_8way_hash;
|
||||
#elif defined(LYRA2Z_4WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_4way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_4way;
|
||||
|
||||
@@ -45,7 +45,7 @@ static const uint64_t blake2b_IV[8] =
|
||||
|
||||
#if defined(SIMD512)
|
||||
|
||||
#define G2W_4X64(a,b,c,d) \
|
||||
#define G2W(a,b,c,d) \
|
||||
a = _mm512_add_epi64( a, b ); \
|
||||
d = _mm512_ror_epi64( _mm512_xor_si512( d, a ), 32 ); \
|
||||
c = _mm512_add_epi64( c, d ); \
|
||||
@@ -56,27 +56,15 @@ static const uint64_t blake2b_IV[8] =
|
||||
b = _mm512_ror_epi64( _mm512_xor_si512( b, c ), 63 );
|
||||
|
||||
#define LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \
|
||||
G2W_4X64( s0, s1, s2, s3 ); \
|
||||
G2W( s0, s1, s2, s3 ); \
|
||||
s0 = mm512_shufll256_64( s0 ); \
|
||||
s3 = mm512_swap256_128( s3); \
|
||||
s3 = mm512_swap256_128( s3 ); \
|
||||
s2 = mm512_shuflr256_64( s2 ); \
|
||||
G2W_4X64( s0, s1, s2, s3 ); \
|
||||
G2W( s0, s1, s2, s3 ); \
|
||||
s0 = mm512_shuflr256_64( s0 ); \
|
||||
s3 = mm512_swap256_128( s3 ); \
|
||||
s2 = mm512_shufll256_64( s2 );
|
||||
|
||||
/*
|
||||
#define LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \
|
||||
G2W_4X64( s0, s1, s2, s3 ); \
|
||||
s3 = mm512_shufll256_64( s3 ); \
|
||||
s1 = mm512_shuflr256_64( s1); \
|
||||
s2 = mm512_swap256_128( s2 ); \
|
||||
G2W_4X64( s0, s1, s2, s3 ); \
|
||||
s3 = mm512_shuflr256_64( s3 ); \
|
||||
s1 = mm512_shufll256_64( s1 ); \
|
||||
s2 = mm512_swap256_128( s2 );
|
||||
*/
|
||||
|
||||
#define LYRA_12_ROUNDS_2WAY_AVX512( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \
|
||||
@@ -95,7 +83,7 @@ static const uint64_t blake2b_IV[8] =
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#define G_4X64(a,b,c,d) \
|
||||
#define G_AVX2(a,b,c,d) \
|
||||
a = _mm256_add_epi64( a, b ); \
|
||||
d = mm256_ror_64( _mm256_xor_si256( d, a ), 32 ); \
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
@@ -107,27 +95,15 @@ static const uint64_t blake2b_IV[8] =
|
||||
|
||||
// Pivot about s1 instead of s0 reduces latency.
|
||||
#define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
G_4X64( s0, s1, s2, s3 ); \
|
||||
G_AVX2( s0, s1, s2, s3 ); \
|
||||
s0 = mm256_shufll_64( s0 ); \
|
||||
s3 = mm256_swap_128( s3); \
|
||||
s3 = mm256_swap_128( s3 ); \
|
||||
s2 = mm256_shuflr_64( s2 ); \
|
||||
G_4X64( s0, s1, s2, s3 ); \
|
||||
G_AVX2( s0, s1, s2, s3 ); \
|
||||
s0 = mm256_shuflr_64( s0 ); \
|
||||
s3 = mm256_swap_128( s3 ); \
|
||||
s2 = mm256_shufll_64( s2 );
|
||||
|
||||
/*
|
||||
#define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
G_4X64( s0, s1, s2, s3 ); \
|
||||
s3 = mm256_shufll_64( s3 ); \
|
||||
s1 = mm256_shuflr_64( s1); \
|
||||
s2 = mm256_swap_128( s2 ); \
|
||||
G_4X64( s0, s1, s2, s3 ); \
|
||||
s3 = mm256_shuflr_64( s3 ); \
|
||||
s1 = mm256_shufll_64( s1 ); \
|
||||
s2 = mm256_swap_128( s2 );
|
||||
*/
|
||||
|
||||
#define LYRA_12_ROUNDS_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
@@ -148,7 +124,7 @@ static const uint64_t blake2b_IV[8] =
|
||||
|
||||
// process 2 columns in parallel
|
||||
// returns void, all args updated
|
||||
#define G_2X64(a,b,c,d) \
|
||||
#define G_128(a,b,c,d) \
|
||||
a = v128_add64( a, b ); \
|
||||
d = v128_ror64xor( d, a, 32 ); \
|
||||
c = v128_add64( c, d ); \
|
||||
@@ -161,16 +137,16 @@ static const uint64_t blake2b_IV[8] =
|
||||
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
{ \
|
||||
v128u64_t t; \
|
||||
G_2X64( s0, s2, s4, s6 ); \
|
||||
G_2X64( s1, s3, s5, s7 ); \
|
||||
G_128( s0, s2, s4, s6 ); \
|
||||
G_128( s1, s3, s5, s7 ); \
|
||||
t = v128_alignr64( s7, s6, 1 ); \
|
||||
s6 = v128_alignr64( s6, s7, 1 ); \
|
||||
s7 = t; \
|
||||
t = v128_alignr64( s2, s3, 1 ); \
|
||||
s2 = v128_alignr64( s3, s2, 1 ); \
|
||||
s3 = t; \
|
||||
G_2X64( s0, s2, s5, s6 ); \
|
||||
G_2X64( s1, s3, s4, s7 ); \
|
||||
G_128( s0, s2, s5, s6 ); \
|
||||
G_128( s1, s3, s4, s7 ); \
|
||||
t = v128_alignr64( s6, s7, 1 ); \
|
||||
s6 = v128_alignr64( s7, s6, 1 ); \
|
||||
s7 = t; \
|
||||
|
||||
@@ -18,11 +18,14 @@ typedef struct {
|
||||
} panama_4way_context __attribute__ ((aligned (64)));
|
||||
|
||||
void panama_4way_init( void *cc );
|
||||
|
||||
void panama_4way_update( void *cc, const void *data, size_t len );
|
||||
|
||||
void panama_4way_close( void *cc, void *dst );
|
||||
|
||||
#define panama_4x32_context panama_4way_context
|
||||
#define panama_4x32_init panama_4way_init
|
||||
#define panama_4x32_update panama_4way_update
|
||||
#define panama_4x32_close panama_4way_close
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
typedef struct {
|
||||
@@ -34,10 +37,13 @@ typedef struct {
|
||||
} panama_8way_context __attribute__ ((aligned (128)));
|
||||
|
||||
void panama_8way_init( void *cc );
|
||||
|
||||
void panama_8way_update( void *cc, const void *data, size_t len );
|
||||
|
||||
void panama_8way_close( void *cc, void *dst );
|
||||
|
||||
#define panama_8x32_context panama_8way_context
|
||||
#define panama_8x32_init panama_8way_init
|
||||
#define panama_8x32_update panama_8way_update
|
||||
#define panama_8x32_close panama_8way_close
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -31,20 +31,20 @@
|
||||
|
||||
union _hmq1725_8way_context_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
blake512_8x64_context blake;
|
||||
bmw512_8x64_context bmw;
|
||||
skein512_8x64_context skein;
|
||||
jh512_8x64_context jh;
|
||||
keccak512_8x64_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cube_4way_context cube;
|
||||
simd_4way_context simd;
|
||||
hamsi512_8way_context hamsi;
|
||||
hamsi512_8x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_8way_context shabal;
|
||||
shabal512_8x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_8way_context sha512;
|
||||
haval256_5_8way_context haval;
|
||||
sha512_8x64_context sha512;
|
||||
haval256_8x32_context haval;
|
||||
#if defined(__VAES__)
|
||||
groestl512_4way_context groestl;
|
||||
shavite512_4way_context shavite;
|
||||
@@ -81,7 +81,7 @@ extern void hmq1725_8way_hash(void *state, const void *input)
|
||||
__m512i* vhB = (__m512i*)vhashB;
|
||||
__m512i* vhC = (__m512i*)vhashC;
|
||||
|
||||
bmw512_8way_full( &ctx.bmw, vhash, input, 80 );
|
||||
bmw512_8x64_full( &ctx.bmw, vhash, input, 80 );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
@@ -141,26 +141,26 @@ extern void hmq1725_8way_hash(void *state, const void *input)
|
||||
|
||||
// B
|
||||
if ( likely( vh_mask & 0xff ) )
|
||||
skein512_8way_full( &ctx.skein, vhashB, vhash, 64 );
|
||||
skein512_8x64_full( &ctx.skein, vhashB, vhash, 64 );
|
||||
|
||||
mm512_blend_hash_8x64( vh, vhC, vhB, vh_mask );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8x64_close( &ctx.keccak, vhash );
|
||||
|
||||
vh_mask = _mm512_testn_epi64_mask( vh[0], vmask );
|
||||
|
||||
// A
|
||||
if ( ( vh_mask & 0xff ) != 0xff )
|
||||
blake512_8way_full( &ctx.blake, vhashA, vhash, 64 );
|
||||
blake512_8x64_full( &ctx.blake, vhashA, vhash, 64 );
|
||||
// B
|
||||
if ( vh_mask & 0xff )
|
||||
bmw512_8way_full( &ctx.bmw, vhashB, vhash, 64 );
|
||||
bmw512_8x64_full( &ctx.bmw, vhashB, vhash, 64 );
|
||||
|
||||
mm512_blend_hash_8x64( vh, vhA, vhB, vh_mask );
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
@@ -176,16 +176,16 @@ extern void hmq1725_8way_hash(void *state, const void *input)
|
||||
|
||||
if ( likely( ( vh_mask & 0xff ) != 0xff ) )
|
||||
{
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhashA );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8x64_close( &ctx.keccak, vhashA );
|
||||
}
|
||||
|
||||
if ( likely( vh_mask & 0xff ) )
|
||||
{
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhashB );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8x64_close( &ctx.jh, vhashB );
|
||||
}
|
||||
|
||||
mm512_blend_hash_8x64( vh, vhA, vhB, vh_mask );
|
||||
@@ -251,9 +251,9 @@ extern void hmq1725_8way_hash(void *state, const void *input)
|
||||
// B
|
||||
if ( likely( vh_mask & 0xff ) )
|
||||
{
|
||||
haval256_5_8way_init( &ctx.haval );
|
||||
haval256_5_8way_update( &ctx.haval, vhash, 64 );
|
||||
haval256_5_8way_close( &ctx.haval, vhash );
|
||||
haval256_8x32_init( &ctx.haval );
|
||||
haval256_8x32_update( &ctx.haval, vhash, 64 );
|
||||
haval256_8x32_close( &ctx.haval, vhash );
|
||||
memset( &vhash[8<<3], 0, 32<<3 );
|
||||
rintrlv_8x32_8x64( vhashB, vhash, 512 );
|
||||
}
|
||||
@@ -296,7 +296,7 @@ extern void hmq1725_8way_hash(void *state, const void *input)
|
||||
|
||||
#endif
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhash, vhash, 64 );
|
||||
blake512_8x64_full( &ctx.blake, vhash, vhash, 64 );
|
||||
|
||||
vh_mask = _mm512_testn_epi64_mask( vh[0], vmask );
|
||||
|
||||
@@ -351,9 +351,9 @@ extern void hmq1725_8way_hash(void *state, const void *input)
|
||||
|
||||
mm512_blend_hash_8x64( vh, vhC, vhB, vh_mask );
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
@@ -429,9 +429,9 @@ extern void hmq1725_8way_hash(void *state, const void *input)
|
||||
|
||||
rintrlv_8x64_8x32( vhashA, vhash, 512 );
|
||||
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhashA, 64 );
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
shabal512_8x32_init( &ctx.shabal );
|
||||
shabal512_8x32_update( &ctx.shabal, vhashA, 64 );
|
||||
shabal512_8x32_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
@@ -474,9 +474,9 @@ extern void hmq1725_8way_hash(void *state, const void *input)
|
||||
// B
|
||||
if ( likely( vh_mask & 0xff ) )
|
||||
{
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8way_close( &ctx.sha512, vhashB );
|
||||
sha512_8x64_init( &ctx.sha512 );
|
||||
sha512_8x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8x64_close( &ctx.sha512, vhashB );
|
||||
}
|
||||
|
||||
mm512_blend_hash_8x64( vh, vhA, vhB, vh_mask );
|
||||
@@ -509,9 +509,9 @@ extern void hmq1725_8way_hash(void *state, const void *input)
|
||||
|
||||
#endif
|
||||
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
sha512_8x64_init( &ctx.sha512 );
|
||||
sha512_8x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8x64_close( &ctx.sha512, vhash );
|
||||
|
||||
vh_mask = _mm512_testn_epi64_mask( vh[0], vmask );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
@@ -522,9 +522,9 @@ extern void hmq1725_8way_hash(void *state, const void *input)
|
||||
{
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
haval256_5_8way_init( &ctx.haval );
|
||||
haval256_5_8way_update( &ctx.haval, vhash, 64 );
|
||||
haval256_5_8way_close( &ctx.haval, vhash );
|
||||
haval256_8x32_init( &ctx.haval );
|
||||
haval256_8x32_update( &ctx.haval, vhash, 64 );
|
||||
haval256_8x32_close( &ctx.haval, vhash );
|
||||
memset( &vhash[8<<3], 0, 32<<3 );
|
||||
rintrlv_8x32_8x64( vhashA, vhash, 512 );
|
||||
}
|
||||
@@ -551,9 +551,9 @@ extern void hmq1725_8way_hash(void *state, const void *input)
|
||||
hash7 );
|
||||
mm512_blend_hash_8x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8way_close( &ctx.bmw, state );
|
||||
bmw512_8x64_init( &ctx.bmw );
|
||||
bmw512_8x64_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8x64_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
int scanhash_hmq1725_8way( struct work *work, uint32_t max_nonce,
|
||||
@@ -605,12 +605,12 @@ int scanhash_hmq1725_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
union _hmq1725_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
blake512_4x64_context blake;
|
||||
bmw512_4x64_context bmw;
|
||||
hashState_groestl groestl;
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
skein512_4x64_context skein;
|
||||
jh512_4x64_context jh;
|
||||
keccak512_4x64_context keccak;
|
||||
hashState_luffa luffa;
|
||||
luffa_2way_context luffa2;
|
||||
cubehashParam cube;
|
||||
@@ -620,12 +620,12 @@ union _hmq1725_4way_context_overlay
|
||||
shavite512_2way_context shavite2;
|
||||
simd_2way_context simd_2way;
|
||||
hashState_echo echo;
|
||||
hamsi512_4way_context hamsi;
|
||||
hamsi512_4x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_4way_context shabal;
|
||||
shabal512_4x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
haval256_5_4way_context haval;
|
||||
sha512_4x64_context sha512;
|
||||
haval256_4x32_context haval;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl2;
|
||||
echo_2way_context echo2;
|
||||
@@ -652,9 +652,9 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
__m256i* vhA = (__m256i*)vhashA;
|
||||
__m256i* vhB = (__m256i*)vhashB;
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, input, 80 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
bmw512_4x64_update( &ctx.bmw, input, 80 );
|
||||
bmw512_4x64_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
@@ -686,17 +686,17 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
// B
|
||||
|
||||
if ( h_mask & 0xffffffff )
|
||||
skein512_4way_full( &ctx.skein, vhashB, vhash, 64 );
|
||||
skein512_4x64_full( &ctx.skein, vhashB, vhash, 64 );
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4x64_close( &ctx.keccak, vhash );
|
||||
|
||||
// second fork, A = blake parallel, B= bmw parallel.
|
||||
|
||||
@@ -704,13 +704,13 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||
|
||||
if ( ( h_mask & 0xffffffff ) != 0xffffffff )
|
||||
blake512_4way_full( &ctx.blake, vhashA, vhash, 64 );
|
||||
blake512_4x64_full( &ctx.blake, vhashA, vhash, 64 );
|
||||
|
||||
if ( h_mask & 0xffffffff )
|
||||
{
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4x64_close( &ctx.bmw, vhashB );
|
||||
}
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
@@ -733,16 +733,16 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
if ( ( h_mask & 0xffffffff ) != 0xffffffff )
|
||||
{
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4x64_close( &ctx.keccak, vhashA );
|
||||
}
|
||||
|
||||
if ( h_mask & 0xffffffff )
|
||||
{
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhashB );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4x64_close( &ctx.jh, vhashB );
|
||||
}
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
@@ -778,9 +778,9 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
// B
|
||||
if ( h_mask & 0xffffffff )
|
||||
{
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way_update( &ctx.haval, vhash, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, vhash );
|
||||
haval256_4x32_init( &ctx.haval );
|
||||
haval256_4x32_update( &ctx.haval, vhash, 64 );
|
||||
haval256_4x32_close( &ctx.haval, vhash );
|
||||
memset( &vhash[8<<2], 0, 32<<2 );
|
||||
rintrlv_4x32_4x64( vhashB, vhash, 512 );
|
||||
}
|
||||
@@ -813,7 +813,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
#endif
|
||||
|
||||
blake512_4way_full( &ctx.blake, vhash, vhash, 64 );
|
||||
blake512_4x64_full( &ctx.blake, vhash, vhash, 64 );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
@@ -845,9 +845,9 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_4x64_init( &ctx.hamsi );
|
||||
hamsi512_4x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
@@ -890,9 +890,9 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
shabal512_4x32_init( &ctx.shabal );
|
||||
shabal512_4x32_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4x32_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
@@ -921,9 +921,9 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
if ( h_mask & 0xffffffff )
|
||||
{
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhashB );
|
||||
sha512_4x64_init( &ctx.sha512 );
|
||||
sha512_4x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4x64_close( &ctx.sha512, vhashB );
|
||||
}
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
@@ -950,9 +950,9 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
#endif
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
sha512_4x64_init( &ctx.sha512 );
|
||||
sha512_4x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4x64_close( &ctx.sha512, vhash );
|
||||
|
||||
// A = haval parallel, B = Whirlpool serial
|
||||
|
||||
@@ -964,9 +964,9 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
if ( ( h_mask & 0xffffffff ) != 0xffffffff )
|
||||
{
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way_update( &ctx.haval, vhash, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, vhash );
|
||||
haval256_4x32_init( &ctx.haval );
|
||||
haval256_4x32_update( &ctx.haval, vhash, 64 );
|
||||
haval256_4x32_close( &ctx.haval, vhash );
|
||||
memset( &vhash[8<<2], 0, 32<<2 );
|
||||
rintrlv_4x32_4x64( vhashA, vhash, 512 );
|
||||
}
|
||||
@@ -984,9 +984,9 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, state );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4x64_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
int scanhash_hmq1725_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
#if defined(LBRY_16WAY)
|
||||
|
||||
static __thread sha256_16way_context sha256_16w_mid;
|
||||
static __thread sha256_16x32_context sha256_16w_mid;
|
||||
|
||||
void lbry_16way_hash( void* output, const void* input )
|
||||
{
|
||||
@@ -36,17 +36,17 @@ void lbry_16way_hash( void* output, const void* input )
|
||||
uint32_t _ALIGN(64) h13[32];
|
||||
uint32_t _ALIGN(64) h14[32];
|
||||
uint32_t _ALIGN(64) h15[32];
|
||||
sha256_16way_context ctx_sha256 __attribute__ ((aligned (64)));
|
||||
sha512_8way_context ctx_sha512;
|
||||
ripemd160_16way_context ctx_ripemd;
|
||||
sha256_16x32_context ctx_sha256 __attribute__ ((aligned (64)));
|
||||
sha512_8x64_context ctx_sha512;
|
||||
ripemd160_16x32_context ctx_ripemd;
|
||||
|
||||
memcpy( &ctx_sha256, &sha256_16w_mid, sizeof(ctx_sha256) );
|
||||
sha256_16way_update( &ctx_sha256, input + (LBRY_MIDSTATE<<4), LBRY_TAIL );
|
||||
sha256_16way_close( &ctx_sha256, vhashA );
|
||||
sha256_16x32_update( &ctx_sha256, input + (LBRY_MIDSTATE<<4), LBRY_TAIL );
|
||||
sha256_16x32_close( &ctx_sha256, vhashA );
|
||||
|
||||
sha256_16way_init( &ctx_sha256 );
|
||||
sha256_16way_update( &ctx_sha256, vhashA, 32 );
|
||||
sha256_16way_close( &ctx_sha256, vhashA );
|
||||
sha256_16x32_init( &ctx_sha256 );
|
||||
sha256_16x32_update( &ctx_sha256, vhashA, 32 );
|
||||
sha256_16x32_close( &ctx_sha256, vhashA );
|
||||
|
||||
// reinterleave to do sha512 4-way 64 bit twice.
|
||||
dintrlv_16x32( h0, h1, h2, h3, h4, h5, h6, h7,
|
||||
@@ -54,13 +54,13 @@ void lbry_16way_hash( void* output, const void* input )
|
||||
intrlv_8x64( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, 256 );
|
||||
intrlv_8x64( vhashB, h8, h9, h10, h11, h12, h13, h14, h15, 256 );
|
||||
|
||||
sha512_8way_init( &ctx_sha512 );
|
||||
sha512_8way_update( &ctx_sha512, vhashA, 32 );
|
||||
sha512_8way_close( &ctx_sha512, vhashA );
|
||||
sha512_8x64_init( &ctx_sha512 );
|
||||
sha512_8x64_update( &ctx_sha512, vhashA, 32 );
|
||||
sha512_8x64_close( &ctx_sha512, vhashA );
|
||||
|
||||
sha512_8way_init( &ctx_sha512 );
|
||||
sha512_8way_update( &ctx_sha512, vhashB, 32 );
|
||||
sha512_8way_close( &ctx_sha512, vhashB );
|
||||
sha512_8x64_init( &ctx_sha512 );
|
||||
sha512_8x64_update( &ctx_sha512, vhashB, 32 );
|
||||
sha512_8x64_close( &ctx_sha512, vhashB );
|
||||
|
||||
// back to 8-way 32 bit
|
||||
dintrlv_8x64( h0, h1, h2, h3, h4, h5, h6, h7, vhashA, 512 );
|
||||
@@ -68,22 +68,22 @@ void lbry_16way_hash( void* output, const void* input )
|
||||
intrlv_16x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7,
|
||||
h8, h9, h10, h11, h12, h13, h14, h15, 512 );
|
||||
|
||||
ripemd160_16way_init( &ctx_ripemd );
|
||||
ripemd160_16way_update( &ctx_ripemd, vhashA, 32 );
|
||||
ripemd160_16way_close( &ctx_ripemd, vhashB );
|
||||
ripemd160_16x32_init( &ctx_ripemd );
|
||||
ripemd160_16x32_update( &ctx_ripemd, vhashA, 32 );
|
||||
ripemd160_16x32_close( &ctx_ripemd, vhashB );
|
||||
|
||||
ripemd160_16way_init( &ctx_ripemd );
|
||||
ripemd160_16way_update( &ctx_ripemd, vhashA+(8<<4), 32 );
|
||||
ripemd160_16way_close( &ctx_ripemd, vhashC );
|
||||
ripemd160_16x32_init( &ctx_ripemd );
|
||||
ripemd160_16x32_update( &ctx_ripemd, vhashA+(8<<4), 32 );
|
||||
ripemd160_16x32_close( &ctx_ripemd, vhashC );
|
||||
|
||||
sha256_16way_init( &ctx_sha256 );
|
||||
sha256_16way_update( &ctx_sha256, vhashB, 20 );
|
||||
sha256_16way_update( &ctx_sha256, vhashC, 20 );
|
||||
sha256_16way_close( &ctx_sha256, vhashA );
|
||||
sha256_16x32_init( &ctx_sha256 );
|
||||
sha256_16x32_update( &ctx_sha256, vhashB, 20 );
|
||||
sha256_16x32_update( &ctx_sha256, vhashC, 20 );
|
||||
sha256_16x32_close( &ctx_sha256, vhashA );
|
||||
|
||||
sha256_16way_init( &ctx_sha256 );
|
||||
sha256_16way_update( &ctx_sha256, vhashA, 32 );
|
||||
sha256_16way_close( &ctx_sha256, output );
|
||||
sha256_16x32_init( &ctx_sha256 );
|
||||
sha256_16x32_update( &ctx_sha256, vhashA, 32 );
|
||||
sha256_16x32_close( &ctx_sha256, output );
|
||||
}
|
||||
|
||||
int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
|
||||
@@ -115,8 +115,8 @@ int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
|
||||
intrlv_16x32( vdata, edata, edata, edata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, edata, edata, edata, edata, edata, 1024 );
|
||||
|
||||
sha256_16way_init( &sha256_16w_mid );
|
||||
sha256_16way_update( &sha256_16w_mid, vdata, LBRY_MIDSTATE );
|
||||
sha256_16x32_init( &sha256_16w_mid );
|
||||
sha256_16x32_update( &sha256_16w_mid, vdata, LBRY_MIDSTATE );
|
||||
|
||||
do
|
||||
{
|
||||
@@ -144,7 +144,7 @@ int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#elif defined(LBRY_8WAY)
|
||||
|
||||
static __thread sha256_8way_context sha256_8w_mid;
|
||||
static __thread sha256_8x32_context sha256_8w_mid;
|
||||
|
||||
void lbry_8way_hash( void* output, const void* input )
|
||||
{
|
||||
@@ -159,52 +159,52 @@ void lbry_8way_hash( void* output, const void* input )
|
||||
uint32_t _ALIGN(32) h5[32];
|
||||
uint32_t _ALIGN(32) h6[32];
|
||||
uint32_t _ALIGN(32) h7[32];
|
||||
sha256_8way_context ctx_sha256 __attribute__ ((aligned (64)));
|
||||
sha512_4way_context ctx_sha512;
|
||||
ripemd160_8way_context ctx_ripemd;
|
||||
sha256_8x32_context ctx_sha256 __attribute__ ((aligned (64)));
|
||||
sha512_4x64_context ctx_sha512;
|
||||
ripemd160_8x32_context ctx_ripemd;
|
||||
|
||||
memcpy( &ctx_sha256, &sha256_8w_mid, sizeof(ctx_sha256) );
|
||||
sha256_8way_update( &ctx_sha256, input + (LBRY_MIDSTATE<<3), LBRY_TAIL );
|
||||
sha256_8way_close( &ctx_sha256, vhashA );
|
||||
sha256_8x32_update( &ctx_sha256, input + (LBRY_MIDSTATE<<3), LBRY_TAIL );
|
||||
sha256_8x32_close( &ctx_sha256, vhashA );
|
||||
|
||||
sha256_8way_init( &ctx_sha256 );
|
||||
sha256_8way_update( &ctx_sha256, vhashA, 32 );
|
||||
sha256_8way_close( &ctx_sha256, vhashA );
|
||||
sha256_8x32_init( &ctx_sha256 );
|
||||
sha256_8x32_update( &ctx_sha256, vhashA, 32 );
|
||||
sha256_8x32_close( &ctx_sha256, vhashA );
|
||||
|
||||
// reinterleave to do sha512 4-way 64 bit twice.
|
||||
dintrlv_8x32( h0, h1, h2, h3, h4, h5, h6, h7, vhashA, 256 );
|
||||
intrlv_4x64( vhashA, h0, h1, h2, h3, 256 );
|
||||
intrlv_4x64( vhashB, h4, h5, h6, h7, 256 );
|
||||
|
||||
sha512_4way_init( &ctx_sha512 );
|
||||
sha512_4way_update( &ctx_sha512, vhashA, 32 );
|
||||
sha512_4way_close( &ctx_sha512, vhashA );
|
||||
sha512_4x64_init( &ctx_sha512 );
|
||||
sha512_4x64_update( &ctx_sha512, vhashA, 32 );
|
||||
sha512_4x64_close( &ctx_sha512, vhashA );
|
||||
|
||||
sha512_4way_init( &ctx_sha512 );
|
||||
sha512_4way_update( &ctx_sha512, vhashB, 32 );
|
||||
sha512_4way_close( &ctx_sha512, vhashB );
|
||||
sha512_4x64_init( &ctx_sha512 );
|
||||
sha512_4x64_update( &ctx_sha512, vhashB, 32 );
|
||||
sha512_4x64_close( &ctx_sha512, vhashB );
|
||||
|
||||
// back to 8-way 32 bit
|
||||
dintrlv_4x64( h0, h1, h2, h3, vhashA, 512 );
|
||||
dintrlv_4x64( h4, h5, h6, h7, vhashB, 512 );
|
||||
intrlv_8x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, 512 );
|
||||
|
||||
ripemd160_8way_init( &ctx_ripemd );
|
||||
ripemd160_8way_update( &ctx_ripemd, vhashA, 32 );
|
||||
ripemd160_8way_close( &ctx_ripemd, vhashB );
|
||||
ripemd160_8x32_init( &ctx_ripemd );
|
||||
ripemd160_8x32_update( &ctx_ripemd, vhashA, 32 );
|
||||
ripemd160_8x32_close( &ctx_ripemd, vhashB );
|
||||
|
||||
ripemd160_8way_init( &ctx_ripemd );
|
||||
ripemd160_8way_update( &ctx_ripemd, vhashA+(8<<3), 32 );
|
||||
ripemd160_8way_close( &ctx_ripemd, vhashC );
|
||||
ripemd160_8x32_init( &ctx_ripemd );
|
||||
ripemd160_8x32_update( &ctx_ripemd, vhashA+(8<<3), 32 );
|
||||
ripemd160_8x32_close( &ctx_ripemd, vhashC );
|
||||
|
||||
sha256_8way_init( &ctx_sha256 );
|
||||
sha256_8way_update( &ctx_sha256, vhashB, 20 );
|
||||
sha256_8way_update( &ctx_sha256, vhashC, 20 );
|
||||
sha256_8way_close( &ctx_sha256, vhashA );
|
||||
sha256_8x32_init( &ctx_sha256 );
|
||||
sha256_8x32_update( &ctx_sha256, vhashB, 20 );
|
||||
sha256_8x32_update( &ctx_sha256, vhashC, 20 );
|
||||
sha256_8x32_close( &ctx_sha256, vhashA );
|
||||
|
||||
sha256_8way_init( &ctx_sha256 );
|
||||
sha256_8way_update( &ctx_sha256, vhashA, 32 );
|
||||
sha256_8way_close( &ctx_sha256, output );
|
||||
sha256_8x32_init( &ctx_sha256 );
|
||||
sha256_8x32_update( &ctx_sha256, vhashA, 32 );
|
||||
sha256_8x32_close( &ctx_sha256, output );
|
||||
}
|
||||
|
||||
int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
|
||||
@@ -235,8 +235,8 @@ int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
|
||||
intrlv_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 1024 );
|
||||
|
||||
sha256_8way_init( &sha256_8w_mid );
|
||||
sha256_8way_update( &sha256_8w_mid, vdata, LBRY_MIDSTATE );
|
||||
sha256_8x32_init( &sha256_8w_mid );
|
||||
sha256_8x32_update( &sha256_8w_mid, vdata, LBRY_MIDSTATE );
|
||||
|
||||
do
|
||||
{
|
||||
|
||||
@@ -57,7 +57,7 @@ do{ \
|
||||
#define ROUND2(a, b, c, d, e, f, s, r, k) \
|
||||
RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
|
||||
|
||||
static void ripemd160_4way_round( ripemd160_4way_context *sc )
|
||||
static void ripemd160_4x32_round( ripemd160_4x32_context *sc )
|
||||
{
|
||||
const __m128i *in = (__m128i*)sc->buf;
|
||||
__m128i *h = (__m128i*)sc->val;
|
||||
@@ -249,7 +249,7 @@ static void ripemd160_4way_round( ripemd160_4way_context *sc )
|
||||
h[0] = tmp;
|
||||
}
|
||||
|
||||
void ripemd160_4way_init( ripemd160_4way_context *sc )
|
||||
void ripemd160_4x32_init( ripemd160_4x32_context *sc )
|
||||
{
|
||||
sc->val[0] = _mm_set1_epi64x( 0x6745230167452301 );
|
||||
sc->val[1] = _mm_set1_epi64x( 0xEFCDAB89EFCDAB89 );
|
||||
@@ -259,7 +259,7 @@ void ripemd160_4way_init( ripemd160_4way_context *sc )
|
||||
sc->count_high = sc->count_low = 0;
|
||||
}
|
||||
|
||||
void ripemd160_4way_update( ripemd160_4way_context *sc, const void *data,
|
||||
void ripemd160_4x32_update( ripemd160_4x32_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m128i *vdata = (__m128i*)data;
|
||||
@@ -281,7 +281,7 @@ void ripemd160_4way_update( ripemd160_4way_context *sc, const void *data,
|
||||
len -= clen;
|
||||
if ( ptr == block_size )
|
||||
{
|
||||
ripemd160_4way_round( sc );
|
||||
ripemd160_4x32_round( sc );
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
@@ -292,7 +292,7 @@ void ripemd160_4way_update( ripemd160_4way_context *sc, const void *data,
|
||||
}
|
||||
}
|
||||
|
||||
void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst )
|
||||
void ripemd160_4x32_close( ripemd160_4x32_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr, u;
|
||||
uint32_t low, high;
|
||||
@@ -306,7 +306,7 @@ void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst )
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_128( sc->buf + (ptr>>2), (block_size - ptr) >> 2 );
|
||||
ripemd160_4way_round( sc );
|
||||
ripemd160_4x32_round( sc );
|
||||
memset_zero_128( sc->buf, pad>>2 );
|
||||
}
|
||||
else
|
||||
@@ -317,7 +317,7 @@ void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst )
|
||||
low = low << 3;
|
||||
sc->buf[ pad>>2 ] = _mm_set1_epi32( low );
|
||||
sc->buf[ (pad>>2) + 1 ] = _mm_set1_epi32( high );
|
||||
ripemd160_4way_round( sc );
|
||||
ripemd160_4x32_round( sc );
|
||||
for (u = 0; u < 5; u ++)
|
||||
casti_v128u32( dst, u ) = sc->val[u];
|
||||
}
|
||||
@@ -357,7 +357,7 @@ do{ \
|
||||
#define ROUND2_8W(a, b, c, d, e, f, s, r, k) \
|
||||
RR_8W(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
|
||||
|
||||
static void ripemd160_8way_round( ripemd160_8way_context *sc )
|
||||
static void ripemd160_8x32_round( ripemd160_8x32_context *sc )
|
||||
{
|
||||
const __m256i *in = (__m256i*)sc->buf;
|
||||
__m256i *h = (__m256i*)sc->val;
|
||||
@@ -550,7 +550,7 @@ static void ripemd160_8way_round( ripemd160_8way_context *sc )
|
||||
}
|
||||
|
||||
|
||||
void ripemd160_8way_init( ripemd160_8way_context *sc )
|
||||
void ripemd160_8x32_init( ripemd160_8x32_context *sc )
|
||||
{
|
||||
sc->val[0] = _mm256_set1_epi64x( 0x6745230167452301 );
|
||||
sc->val[1] = _mm256_set1_epi64x( 0xEFCDAB89EFCDAB89 );
|
||||
@@ -560,7 +560,7 @@ void ripemd160_8way_init( ripemd160_8way_context *sc )
|
||||
sc->count_high = sc->count_low = 0;
|
||||
}
|
||||
|
||||
void ripemd160_8way_update( ripemd160_8way_context *sc, const void *data,
|
||||
void ripemd160_8x32_update( ripemd160_8x32_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
@@ -582,7 +582,7 @@ void ripemd160_8way_update( ripemd160_8way_context *sc, const void *data,
|
||||
len -= clen;
|
||||
if ( ptr == block_size )
|
||||
{
|
||||
ripemd160_8way_round( sc );
|
||||
ripemd160_8x32_round( sc );
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
@@ -593,7 +593,7 @@ void ripemd160_8way_update( ripemd160_8way_context *sc, const void *data,
|
||||
}
|
||||
}
|
||||
|
||||
void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst )
|
||||
void ripemd160_8x32_close( ripemd160_8x32_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr, u;
|
||||
uint32_t low, high;
|
||||
@@ -607,7 +607,7 @@ void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst )
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_256( sc->buf + (ptr>>2), (block_size - ptr) >> 2 );
|
||||
ripemd160_8way_round( sc );
|
||||
ripemd160_8x32_round( sc );
|
||||
memset_zero_256( sc->buf, pad>>2 );
|
||||
}
|
||||
else
|
||||
@@ -618,7 +618,7 @@ void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst )
|
||||
low = low << 3;
|
||||
sc->buf[ pad>>2 ] = _mm256_set1_epi32( low );
|
||||
sc->buf[ (pad>>2) + 1 ] = _mm256_set1_epi32( high );
|
||||
ripemd160_8way_round( sc );
|
||||
ripemd160_8x32_round( sc );
|
||||
for (u = 0; u < 5; u ++)
|
||||
casti_m256i( dst, u ) = sc->val[u];
|
||||
}
|
||||
@@ -629,7 +629,6 @@ void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst )
|
||||
|
||||
// RIPEMD-160 16 way
|
||||
|
||||
|
||||
#define F16W_1(x, y, z) \
|
||||
_mm512_xor_si512( _mm512_xor_si512( x, y ), z )
|
||||
|
||||
@@ -659,7 +658,7 @@ do{ \
|
||||
#define ROUND2_16W(a, b, c, d, e, f, s, r, k) \
|
||||
RR_16W(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
|
||||
|
||||
static void ripemd160_16way_round( ripemd160_16way_context *sc )
|
||||
static void ripemd160_16x32_round( ripemd160_16x32_context *sc )
|
||||
{
|
||||
const __m512i *in = (__m512i*)sc->buf;
|
||||
__m512i *h = (__m512i*)sc->val;
|
||||
@@ -851,7 +850,7 @@ static void ripemd160_16way_round( ripemd160_16way_context *sc )
|
||||
h[0] = tmp;
|
||||
}
|
||||
|
||||
void ripemd160_16way_init( ripemd160_16way_context *sc )
|
||||
void ripemd160_16x32_init( ripemd160_16x32_context *sc )
|
||||
{
|
||||
sc->val[0] = _mm512_set1_epi64( 0x6745230167452301 );
|
||||
sc->val[1] = _mm512_set1_epi64( 0xEFCDAB89EFCDAB89 );
|
||||
@@ -861,7 +860,7 @@ void ripemd160_16way_init( ripemd160_16way_context *sc )
|
||||
sc->count_high = sc->count_low = 0;
|
||||
}
|
||||
|
||||
void ripemd160_16way_update( ripemd160_16way_context *sc, const void *data,
|
||||
void ripemd160_16x32_update( ripemd160_16x32_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
@@ -883,7 +882,7 @@ void ripemd160_16way_update( ripemd160_16way_context *sc, const void *data,
|
||||
len -= clen;
|
||||
if ( ptr == block_size )
|
||||
{
|
||||
ripemd160_16way_round( sc );
|
||||
ripemd160_16x32_round( sc );
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
@@ -894,7 +893,7 @@ void ripemd160_16way_update( ripemd160_16way_context *sc, const void *data,
|
||||
}
|
||||
}
|
||||
|
||||
void ripemd160_16way_close( ripemd160_16way_context *sc, void *dst )
|
||||
void ripemd160_16x32_close( ripemd160_16x32_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr, u;
|
||||
uint32_t low, high;
|
||||
@@ -908,7 +907,7 @@ void ripemd160_16way_close( ripemd160_16way_context *sc, void *dst )
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_512( sc->buf + (ptr>>2), (block_size - ptr) >> 2 );
|
||||
ripemd160_16way_round( sc );
|
||||
ripemd160_16x32_round( sc );
|
||||
memset_zero_512( sc->buf, pad>>2 );
|
||||
}
|
||||
else
|
||||
@@ -919,7 +918,7 @@ void ripemd160_16way_close( ripemd160_16way_context *sc, void *dst )
|
||||
low = low << 3;
|
||||
sc->buf[ pad>>2 ] = _mm512_set1_epi32( low );
|
||||
sc->buf[ (pad>>2) + 1 ] = _mm512_set1_epi32( high );
|
||||
ripemd160_16way_round( sc );
|
||||
ripemd160_16x32_round( sc );
|
||||
for (u = 0; u < 5; u ++)
|
||||
casti_m512i( dst, u ) = sc->val[u];
|
||||
}
|
||||
|
||||
@@ -12,12 +12,12 @@ typedef struct
|
||||
__m128i buf[64>>2];
|
||||
__m128i val[5];
|
||||
uint32_t count_high, count_low;
|
||||
} __attribute__ ((aligned (64))) ripemd160_4way_context;
|
||||
} __attribute__ ((aligned (64))) ripemd160_4x32_context;
|
||||
|
||||
void ripemd160_4way_init( ripemd160_4way_context *sc );
|
||||
void ripemd160_4way_update( ripemd160_4way_context *sc, const void *data,
|
||||
void ripemd160_4x32_init( ripemd160_4x32_context *sc );
|
||||
void ripemd160_4x32_update( ripemd160_4x32_context *sc, const void *data,
|
||||
size_t len );
|
||||
void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst );
|
||||
void ripemd160_4x32_close( ripemd160_4x32_context *sc, void *dst );
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
@@ -26,12 +26,12 @@ typedef struct
|
||||
__m256i buf[64>>2];
|
||||
__m256i val[5];
|
||||
uint32_t count_high, count_low;
|
||||
} __attribute__ ((aligned (128))) ripemd160_8way_context;
|
||||
} __attribute__ ((aligned (128))) ripemd160_8x32_context;
|
||||
|
||||
void ripemd160_8way_init( ripemd160_8way_context *sc );
|
||||
void ripemd160_8way_update( ripemd160_8way_context *sc, const void *data,
|
||||
void ripemd160_8x32_init( ripemd160_8x32_context *sc );
|
||||
void ripemd160_8x32_update( ripemd160_8x32_context *sc, const void *data,
|
||||
size_t len );
|
||||
void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst );
|
||||
void ripemd160_8x32_close( ripemd160_8x32_context *sc, void *dst );
|
||||
|
||||
#if defined(SIMD512)
|
||||
|
||||
@@ -40,12 +40,12 @@ typedef struct
|
||||
__m512i buf[64>>2];
|
||||
__m512i val[5];
|
||||
uint32_t count_high, count_low;
|
||||
} __attribute__ ((aligned (128))) ripemd160_16way_context;
|
||||
} __attribute__ ((aligned (128))) ripemd160_16x32_context;
|
||||
|
||||
void ripemd160_16way_init( ripemd160_16way_context *sc );
|
||||
void ripemd160_16way_update( ripemd160_16way_context *sc, const void *data,
|
||||
void ripemd160_16x32_init( ripemd160_16x32_context *sc );
|
||||
void ripemd160_16x32_update( ripemd160_16x32_context *sc, const void *data,
|
||||
size_t len );
|
||||
void ripemd160_16way_close( ripemd160_16way_context *sc, void *dst );
|
||||
void ripemd160_16x32_close( ripemd160_16x32_context *sc, void *dst );
|
||||
|
||||
#endif // AVX512
|
||||
#endif // __AVX2__
|
||||
|
||||
@@ -597,6 +597,45 @@ static void blake2s_compress(blake2s_state *S, const void *buf) {
|
||||
v[13] = S->t[1] ^ blake2s_IV[5];
|
||||
v[14] = S->f[0] ^ blake2s_IV[6];
|
||||
v[15] = S->f[1] ^ blake2s_IV[7];
|
||||
|
||||
#if defined(__SSE2__) || defined(__ARM_NEON)
|
||||
|
||||
v128_t *V = (v128_t*)v;
|
||||
|
||||
#define ROUND( r ) \
|
||||
V[0] = v128_add32( V[0], v128_add32( V[1], v128_set32( \
|
||||
m[blake2s_sigma[r][ 6]], m[blake2s_sigma[r][ 4]], \
|
||||
m[blake2s_sigma[r][ 2]], m[blake2s_sigma[r][ 0]] ) ) ); \
|
||||
V[3] = v128_ror32( v128_xor( V[3], V[0] ), 16 ); \
|
||||
V[2] = v128_add32( V[2], V[3] ); \
|
||||
V[1] = v128_ror32( v128_xor( V[1], V[2] ), 12 ); \
|
||||
V[0] = v128_add32( V[0], v128_add32( V[1], v128_set32( \
|
||||
m[blake2s_sigma[r][ 7]], m[blake2s_sigma[r][ 5]], \
|
||||
m[blake2s_sigma[r][ 3]], m[blake2s_sigma[r][ 1]] ) ) ); \
|
||||
V[3] = v128_ror32( v128_xor( V[3], V[0] ), 8 ); \
|
||||
V[2] = v128_add32( V[2], V[3] ); \
|
||||
V[1] = v128_ror32( v128_xor( V[1], V[2] ), 7 ); \
|
||||
V[0] = v128_shufll32( V[0] ); \
|
||||
V[3] = v128_swap64( V[3] ); \
|
||||
V[2] = v128_shuflr32( V[2] ); \
|
||||
V[0] = v128_add32( V[0], v128_add32( V[1], v128_set32( \
|
||||
m[blake2s_sigma[r][12]], m[blake2s_sigma[r][10]], \
|
||||
m[blake2s_sigma[r][ 8]], m[blake2s_sigma[r][14]] ) ) ); \
|
||||
V[3] = v128_ror32( v128_xor( V[3], V[0] ), 16 ); \
|
||||
V[2] = v128_add32( V[2], V[3] ); \
|
||||
V[1] = v128_ror32( v128_xor( V[1], V[2] ), 12 ); \
|
||||
V[0] = v128_add32( V[0], v128_add32( V[1], v128_set32( \
|
||||
m[blake2s_sigma[r][13]], m[blake2s_sigma[r][11]], \
|
||||
m[blake2s_sigma[r][ 9]], m[blake2s_sigma[r][15]] ) ) ); \
|
||||
V[3] = v128_ror32( v128_xor( V[3], V[0] ), 8 ); \
|
||||
V[2] = v128_add32( V[2], V[3] ); \
|
||||
V[1] = v128_ror32( v128_xor( V[1], V[2] ), 7 ); \
|
||||
V[0] = v128_shuflr32( V[0] ); \
|
||||
V[3] = v128_swap64( V[3] ); \
|
||||
V[2] = v128_shufll32( V[2] )
|
||||
|
||||
#else
|
||||
|
||||
#define G(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = a + b + m[blake2s_sigma[r][2*i+0]]; \
|
||||
@@ -619,6 +658,9 @@ static void blake2s_compress(blake2s_state *S, const void *buf) {
|
||||
G(r, 6, v[ 2], v[ 7], v[ 8], v[13]); \
|
||||
G(r, 7, v[ 3], v[ 4], v[ 9], v[14]); \
|
||||
} while(0)
|
||||
|
||||
#endif
|
||||
|
||||
ROUND(0);
|
||||
ROUND(1);
|
||||
ROUND(2);
|
||||
|
||||
@@ -336,7 +336,7 @@ static const uint32_t _ALIGN(16) finalblk_4way[4 * 16] = {
|
||||
};
|
||||
*/
|
||||
|
||||
static inline void sha256_4way_init_state( void *state )
|
||||
static inline void sha256_4x32_init_state( void *state )
|
||||
{
|
||||
casti_v128( state, 0 ) = v128_32( 0x6A09E667 );
|
||||
casti_v128( state, 1 ) = v128_32( 0xBB67AE85 );
|
||||
@@ -359,21 +359,21 @@ static inline void HMAC_SHA256_80_init_4way( const uint32_t *key,
|
||||
memcpy( pad, key + 4*16, 4*16 );
|
||||
memcpy( pad + 4*4, keypad_4way, 4*48 );
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)ihash, (v128_t*)pad,
|
||||
sha256_4x32_transform_le( (v128_t*)ihash, (v128_t*)pad,
|
||||
(const v128_t*)tstate );
|
||||
|
||||
sha256_4way_init_state( tstate );
|
||||
sha256_4x32_init_state( tstate );
|
||||
|
||||
for ( i = 0; i < 4*8; i++ ) pad[i] = ihash[i] ^ 0x5c5c5c5c;
|
||||
for ( ; i < 4*16; i++ ) pad[i] = 0x5c5c5c5c;
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)ostate, (v128_t*)pad,
|
||||
sha256_4x32_transform_le( (v128_t*)ostate, (v128_t*)pad,
|
||||
(const v128_t*)tstate );
|
||||
|
||||
for ( i = 0; i < 4*8; i++ ) pad[i] = ihash[i] ^ 0x36363636;
|
||||
for ( ; i < 4*16; i++ ) pad[i] = 0x36363636;
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)tstate, (v128_t*)pad,
|
||||
sha256_4x32_transform_le( (v128_t*)tstate, (v128_t*)pad,
|
||||
(const v128_t*)tstate );
|
||||
}
|
||||
|
||||
@@ -386,7 +386,7 @@ static inline void PBKDF2_SHA256_80_128_4way( const uint32_t *tstate,
|
||||
uint32_t _ALIGN(16) obuf[4 * 16];
|
||||
int i, j;
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)istate, (v128_t*)salt,
|
||||
sha256_4x32_transform_le( (v128_t*)istate, (v128_t*)salt,
|
||||
(const v128_t*)tstate );
|
||||
|
||||
memcpy(ibuf, salt + 4 * 16, 4 * 16);
|
||||
@@ -400,10 +400,10 @@ static inline void PBKDF2_SHA256_80_128_4way( const uint32_t *tstate,
|
||||
ibuf[4 * 4 + 2] = i + 1;
|
||||
ibuf[4 * 4 + 3] = i + 1;
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)obuf, (v128_t*)ibuf,
|
||||
sha256_4x32_transform_le( (v128_t*)obuf, (v128_t*)ibuf,
|
||||
(const v128_t*)istate );
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)ostate2, (v128_t*)obuf,
|
||||
sha256_4x32_transform_le( (v128_t*)ostate2, (v128_t*)obuf,
|
||||
(const v128_t*)ostate );
|
||||
|
||||
for ( j = 0; j < 4 * 8; j++ )
|
||||
@@ -418,9 +418,9 @@ static inline void PBKDF2_SHA256_128_32_4way( uint32_t *tstate,
|
||||
uint32_t _ALIGN(64) buf[4 * 16];
|
||||
int i;
|
||||
|
||||
sha256_4way_transform_be( (v128_t*)tstate, (v128_t*)salt,
|
||||
sha256_4x32_transform_be( (v128_t*)tstate, (v128_t*)salt,
|
||||
(const v128_t*)tstate );
|
||||
sha256_4way_transform_be( (v128_t*)tstate, (v128_t*)( salt + 4*16),
|
||||
sha256_4x32_transform_be( (v128_t*)tstate, (v128_t*)( salt + 4*16),
|
||||
(const v128_t*)tstate );
|
||||
|
||||
final[ 0] = v128_32( 0x00000001 );
|
||||
@@ -431,13 +431,13 @@ static inline void PBKDF2_SHA256_128_32_4way( uint32_t *tstate,
|
||||
= v128_xor( final[ 0], final[ 0] ); //_mm_setzero_si128();
|
||||
final[15] = v128_32 ( 0x00000620 );
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)tstate, (v128_t*)final,
|
||||
sha256_4x32_transform_le( (v128_t*)tstate, (v128_t*)final,
|
||||
(const v128_t*)tstate );
|
||||
|
||||
memcpy(buf, tstate, 4 * 32);
|
||||
memcpy(buf + 4 * 8, outerpad_4way, 4 * 32);
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)ostate, (v128_t*)buf,
|
||||
sha256_4x32_transform_le( (v128_t*)ostate, (v128_t*)buf,
|
||||
(const v128_t*)ostate );
|
||||
|
||||
for ( i = 0; i < 4 * 8; i++ )
|
||||
@@ -467,7 +467,7 @@ static const uint32_t _ALIGN(32) finalblk_8way[8 * 16] = {
|
||||
};
|
||||
*/
|
||||
|
||||
static inline void sha256_8way_init_state( void *state )
|
||||
static inline void sha256_8x32_init_state( void *state )
|
||||
{
|
||||
casti_m256i( state, 0 ) = _mm256_set1_epi32( 0x6A09E667 );
|
||||
casti_m256i( state, 1 ) = _mm256_set1_epi32( 0xBB67AE85 );
|
||||
@@ -491,21 +491,21 @@ static inline void HMAC_SHA256_80_init_8way( const uint32_t *key,
|
||||
memset( pad + 8*5, 0x00, 8*40 );
|
||||
for ( i = 0; i < 8; i++ ) pad[ 8*15 + i ] = 0x00000280;
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)ihash, (__m256i*)pad,
|
||||
sha256_8x32_transform_le( (__m256i*)ihash, (__m256i*)pad,
|
||||
(const __m256i*)tstate );
|
||||
|
||||
sha256_8way_init_state( tstate );
|
||||
sha256_8x32_init_state( tstate );
|
||||
|
||||
for ( i = 0; i < 8*8; i++ ) pad[i] = ihash[i] ^ 0x5c5c5c5c;
|
||||
for ( ; i < 8*16; i++ ) pad[i] = 0x5c5c5c5c;
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)ostate, (__m256i*)pad,
|
||||
sha256_8x32_transform_le( (__m256i*)ostate, (__m256i*)pad,
|
||||
(const __m256i*)tstate );
|
||||
|
||||
for ( i = 0; i < 8*8; i++ ) pad[i] = ihash[i] ^ 0x36363636;
|
||||
for ( ; i < 8*16; i++ ) pad[i] = 0x36363636;
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)tstate, (__m256i*)pad,
|
||||
sha256_8x32_transform_le( (__m256i*)tstate, (__m256i*)pad,
|
||||
(const __m256i*)tstate );
|
||||
}
|
||||
|
||||
@@ -518,7 +518,7 @@ static inline void PBKDF2_SHA256_80_128_8way( const uint32_t *tstate,
|
||||
uint32_t _ALIGN(32) obuf[8 * 16];
|
||||
int i, j;
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)istate, (__m256i*)salt,
|
||||
sha256_8x32_transform_le( (__m256i*)istate, (__m256i*)salt,
|
||||
(const __m256i*)tstate );
|
||||
|
||||
memcpy( ibuf, salt + 8*16, 8*16 );
|
||||
@@ -541,10 +541,10 @@ static inline void PBKDF2_SHA256_80_128_8way( const uint32_t *tstate,
|
||||
ibuf[8 * 4 + 6] = i + 1;
|
||||
ibuf[8 * 4 + 7] = i + 1;
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)obuf, (__m256i*)ibuf,
|
||||
sha256_8x32_transform_le( (__m256i*)obuf, (__m256i*)ibuf,
|
||||
(const __m256i*)istate );
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)ostate2, (__m256i*)obuf,
|
||||
sha256_8x32_transform_le( (__m256i*)ostate2, (__m256i*)obuf,
|
||||
(const __m256i*)ostate );
|
||||
|
||||
for ( j = 0; j < 8*8; j++ )
|
||||
@@ -559,9 +559,9 @@ static inline void PBKDF2_SHA256_128_32_8way( uint32_t *tstate,
|
||||
uint32_t _ALIGN(128) buf[ 8*16 ];
|
||||
int i;
|
||||
|
||||
sha256_8way_transform_be( (__m256i*)tstate, (__m256i*)salt,
|
||||
sha256_8x32_transform_be( (__m256i*)tstate, (__m256i*)salt,
|
||||
(const __m256i*)tstate );
|
||||
sha256_8way_transform_be( (__m256i*)tstate, (__m256i*)( salt + 8*16),
|
||||
sha256_8x32_transform_be( (__m256i*)tstate, (__m256i*)( salt + 8*16),
|
||||
(const __m256i*)tstate );
|
||||
|
||||
final[ 0] = _mm256_set1_epi32( 0x00000001 );
|
||||
@@ -572,7 +572,7 @@ static inline void PBKDF2_SHA256_128_32_8way( uint32_t *tstate,
|
||||
= _mm256_setzero_si256();
|
||||
final[15] = _mm256_set1_epi32 ( 0x00000620 );
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)tstate, final,
|
||||
sha256_8x32_transform_le( (__m256i*)tstate, final,
|
||||
(const __m256i*)tstate );
|
||||
|
||||
memcpy( buf, tstate, 8*32 );
|
||||
@@ -580,7 +580,7 @@ static inline void PBKDF2_SHA256_128_32_8way( uint32_t *tstate,
|
||||
memset( buf + 8*9, 0x00, 8*24 );
|
||||
for ( i = 0; i < 8; i++ ) buf[ 8*15 + i ] = 0x00000300;
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)ostate, (__m256i*)buf,
|
||||
sha256_8x32_transform_le( (__m256i*)ostate, (__m256i*)buf,
|
||||
(const __m256i*)ostate );
|
||||
|
||||
for (i = 0; i < 8 * 8; i++)
|
||||
@@ -591,7 +591,7 @@ static inline void PBKDF2_SHA256_128_32_8way( uint32_t *tstate,
|
||||
|
||||
#if defined(SIMD512)
|
||||
|
||||
static inline void sha256_16way_init_state( void *state )
|
||||
static inline void sha256_16x32_init_state( void *state )
|
||||
{
|
||||
casti_m512i( state, 0 ) = _mm512_set1_epi32( 0x6A09E667 );
|
||||
casti_m512i( state, 1 ) = _mm512_set1_epi32( 0xBB67AE85 );
|
||||
@@ -615,21 +615,21 @@ static inline void HMAC_SHA256_80_init_16way( const uint32_t *key,
|
||||
memset( pad + 16*5, 0x00, 16*40 );
|
||||
for ( i = 0; i < 16; i++ ) pad[ 16*15 + i ] = 0x00000280;
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)ihash, (__m512i*)pad,
|
||||
sha256_16x32_transform_le( (__m512i*)ihash, (__m512i*)pad,
|
||||
(const __m512i*)tstate );
|
||||
|
||||
sha256_16way_init_state( tstate );
|
||||
sha256_16x32_init_state( tstate );
|
||||
|
||||
for ( i = 0; i < 16*8; i++ ) pad[i] = ihash[i] ^ 0x5c5c5c5c;
|
||||
for ( ; i < 16*16; i++ ) pad[i] = 0x5c5c5c5c;
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)ostate, (__m512i*)pad,
|
||||
sha256_16x32_transform_le( (__m512i*)ostate, (__m512i*)pad,
|
||||
(const __m512i*)tstate );
|
||||
|
||||
for ( i = 0; i < 16*8; i++ ) pad[i] = ihash[i] ^ 0x36363636;
|
||||
for ( ; i < 16*16; i++ ) pad[i] = 0x36363636;
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)tstate, (__m512i*)pad,
|
||||
sha256_16x32_transform_le( (__m512i*)tstate, (__m512i*)pad,
|
||||
(const __m512i*)tstate );
|
||||
}
|
||||
|
||||
@@ -642,7 +642,7 @@ static inline void PBKDF2_SHA256_80_128_16way( const uint32_t *tstate,
|
||||
uint32_t _ALIGN(128) ostate2[ 16*8 ];
|
||||
int i, j;
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)istate, (__m512i*)salt,
|
||||
sha256_16x32_transform_le( (__m512i*)istate, (__m512i*)salt,
|
||||
(const __m512i*)tstate );
|
||||
|
||||
memcpy( ibuf, salt + 16*16, 16*16 );
|
||||
@@ -673,10 +673,10 @@ static inline void PBKDF2_SHA256_80_128_16way( const uint32_t *tstate,
|
||||
ibuf[ 16*4 + 14 ] = i + 1;
|
||||
ibuf[ 16*4 + 15 ] = i + 1;
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)obuf, (__m512i*)ibuf,
|
||||
sha256_16x32_transform_le( (__m512i*)obuf, (__m512i*)ibuf,
|
||||
(const __m512i*)istate );
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)ostate2, (__m512i*)obuf,
|
||||
sha256_16x32_transform_le( (__m512i*)ostate2, (__m512i*)obuf,
|
||||
(const __m512i*)ostate );
|
||||
|
||||
for ( j = 0; j < 16*8; j++ )
|
||||
@@ -691,9 +691,9 @@ static inline void PBKDF2_SHA256_128_32_16way( uint32_t *tstate,
|
||||
uint32_t _ALIGN(128) buf[ 16*16 ];
|
||||
int i;
|
||||
|
||||
sha256_16way_transform_be( (__m512i*)tstate, (__m512i*)salt,
|
||||
sha256_16x32_transform_be( (__m512i*)tstate, (__m512i*)salt,
|
||||
(const __m512i*)tstate );
|
||||
sha256_16way_transform_be( (__m512i*)tstate, (__m512i*)( salt + 16*16),
|
||||
sha256_16x32_transform_be( (__m512i*)tstate, (__m512i*)( salt + 16*16),
|
||||
(const __m512i*)tstate );
|
||||
|
||||
final[ 0] = _mm512_set1_epi32( 0x00000001 );
|
||||
@@ -704,7 +704,7 @@ static inline void PBKDF2_SHA256_128_32_16way( uint32_t *tstate,
|
||||
= _mm512_setzero_si512();
|
||||
final[15] = _mm512_set1_epi32 ( 0x00000620 );
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)tstate, final,
|
||||
sha256_16x32_transform_le( (__m512i*)tstate, final,
|
||||
(const __m512i*)tstate );
|
||||
|
||||
memcpy( buf, tstate, 16*32 );
|
||||
@@ -712,7 +712,7 @@ static inline void PBKDF2_SHA256_128_32_16way( uint32_t *tstate,
|
||||
memset( buf + 16*9, 0x00, 16*24 );
|
||||
for ( i = 0; i < 16; i++ ) buf[ 16*15 + i ] = 0x00000300;
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)ostate, (__m512i*)buf,
|
||||
sha256_16x32_transform_le( (__m512i*)ostate, (__m512i*)buf,
|
||||
(const __m512i*)ostate );
|
||||
|
||||
for ( i = 0; i < 16*8; i++ )
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
#include "hmac-sha256-hash-4way.h"
|
||||
#include "compat.h"
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#if defined(__SSE2__) || defined(__ARM_NEON)
|
||||
// HMAC 4-way SSE2
|
||||
|
||||
/**
|
||||
@@ -62,30 +62,30 @@ hmac_sha256_4way_init( hmac_sha256_4way_context *ctx, const void *_K,
|
||||
/* If Klen > 64, the key is really SHA256(K). */
|
||||
if ( Klen > 64 )
|
||||
{
|
||||
sha256_4way_init( &ctx->ictx );
|
||||
sha256_4way_update( &ctx->ictx, K, Klen );
|
||||
sha256_4way_close( &ctx->ictx, khash );
|
||||
sha256_4x32_init( &ctx->ictx );
|
||||
sha256_4x32_update( &ctx->ictx, K, Klen );
|
||||
sha256_4x32_close( &ctx->ictx, khash );
|
||||
K = khash;
|
||||
Klen = 32;
|
||||
}
|
||||
|
||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||
sha256_4way_init( &ctx->ictx );
|
||||
sha256_4x32_init( &ctx->ictx );
|
||||
memset( pad, 0x36, 64*4 );
|
||||
|
||||
for ( i = 0; i < Klen; i++ )
|
||||
casti_v128u32( pad, i ) = _mm_xor_si128( casti_v128u32( pad, i ),
|
||||
casti_v128u32( K, i ) );
|
||||
casti_v128u32( pad, i ) = v128_xor( casti_v128u32( pad, i ),
|
||||
casti_v128u32( K, i ) );
|
||||
|
||||
sha256_4way_update( &ctx->ictx, pad, 64 );
|
||||
sha256_4x32_update( &ctx->ictx, pad, 64 );
|
||||
|
||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||
sha256_4way_init( &ctx->octx );
|
||||
sha256_4x32_init( &ctx->octx );
|
||||
memset( pad, 0x5c, 64*4 );
|
||||
for ( i = 0; i < Klen/4; i++ )
|
||||
casti_v128u32( pad, i ) = _mm_xor_si128( casti_v128u32( pad, i ),
|
||||
casti_v128u32( K, i ) );
|
||||
sha256_4way_update( &ctx->octx, pad, 64 );
|
||||
casti_v128u32( pad, i ) = v128_xor( casti_v128u32( pad, i ),
|
||||
casti_v128u32( K, i ) );
|
||||
sha256_4x32_update( &ctx->octx, pad, 64 );
|
||||
}
|
||||
|
||||
/* Add bytes to the HMAC-SHA256 operation. */
|
||||
@@ -94,7 +94,7 @@ hmac_sha256_4way_update( hmac_sha256_4way_context *ctx, const void *in,
|
||||
size_t len )
|
||||
{
|
||||
/* Feed data to the inner SHA256 operation. */
|
||||
sha256_4way_update( &ctx->ictx, in, len );
|
||||
sha256_4x32_update( &ctx->ictx, in, len );
|
||||
}
|
||||
|
||||
/* Finish an HMAC-SHA256 operation. */
|
||||
@@ -104,13 +104,13 @@ hmac_sha256_4way_close( hmac_sha256_4way_context *ctx, void *digest )
|
||||
unsigned char ihash[32*4] __attribute__ ((aligned (64)));
|
||||
|
||||
/* Finish the inner SHA256 operation. */
|
||||
sha256_4way_close( &ctx->ictx, ihash );
|
||||
sha256_4x32_close( &ctx->ictx, ihash );
|
||||
|
||||
/* Feed the inner hash to the outer SHA256 operation. */
|
||||
sha256_4way_update( &ctx->octx, ihash, 32 );
|
||||
sha256_4x32_update( &ctx->octx, ihash, 32 );
|
||||
|
||||
/* Finish the outer SHA256 operation. */
|
||||
sha256_4way_close( &ctx->octx, digest );
|
||||
sha256_4x32_close( &ctx->octx, digest );
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -126,7 +126,7 @@ pbkdf2_sha256_4way( uint8_t *buf, size_t dkLen,
|
||||
hmac_sha256_4way_context PShctx, hctx;
|
||||
uint8_t _ALIGN(128) T[32*4];
|
||||
uint8_t _ALIGN(128) U[32*4];
|
||||
__m128i ivec;
|
||||
v128u32_t ivec;
|
||||
size_t i, clen;
|
||||
uint64_t j;
|
||||
int k;
|
||||
@@ -139,7 +139,7 @@ pbkdf2_sha256_4way( uint8_t *buf, size_t dkLen,
|
||||
for ( i = 0; i * 32 < dkLen; i++ )
|
||||
{
|
||||
/* Generate INT(i + 1). */
|
||||
ivec = _mm_set1_epi32( bswap_32( i+1 ) );
|
||||
ivec = v128_32( bswap_32( i+1 ) );
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy( &hctx, &PShctx, sizeof(hmac_sha256_4way_context) );
|
||||
@@ -158,8 +158,8 @@ pbkdf2_sha256_4way( uint8_t *buf, size_t dkLen,
|
||||
|
||||
/* ... xor U_j ... */
|
||||
for ( k = 0; k < 8; k++ )
|
||||
casti_v128u32( T, k ) = _mm_xor_si128( casti_v128u32( T, k ),
|
||||
casti_v128u32( U, k ) );
|
||||
casti_v128u32( T, k ) = v128_xor( casti_v128u32( T, k ),
|
||||
casti_v128u32( U, k ) );
|
||||
}
|
||||
|
||||
/* Copy as many bytes as necessary into buf. */
|
||||
@@ -199,30 +199,30 @@ hmac_sha256_8way_init( hmac_sha256_8way_context *ctx, const void *_K,
|
||||
/* If Klen > 64, the key is really SHA256(K). */
|
||||
if ( Klen > 64 )
|
||||
{
|
||||
sha256_8way_init( &ctx->ictx );
|
||||
sha256_8way_update( &ctx->ictx, K, Klen );
|
||||
sha256_8way_close( &ctx->ictx, khash );
|
||||
sha256_8x32_init( &ctx->ictx );
|
||||
sha256_8x32_update( &ctx->ictx, K, Klen );
|
||||
sha256_8x32_close( &ctx->ictx, khash );
|
||||
K = khash;
|
||||
Klen = 32;
|
||||
}
|
||||
|
||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||
sha256_8way_init( &ctx->ictx );
|
||||
sha256_8x32_init( &ctx->ictx );
|
||||
memset( pad, 0x36, 64*8);
|
||||
|
||||
for ( i = 0; i < Klen/4; i++ )
|
||||
casti_m256i( pad, i ) = _mm256_xor_si256( casti_m256i( pad, i ),
|
||||
casti_m256i( K, i ) );
|
||||
|
||||
sha256_8way_update( &ctx->ictx, pad, 64 );
|
||||
sha256_8x32_update( &ctx->ictx, pad, 64 );
|
||||
|
||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||
sha256_8way_init( &ctx->octx );
|
||||
sha256_8x32_init( &ctx->octx );
|
||||
memset( pad, 0x5c, 64*8 );
|
||||
for ( i = 0; i < Klen/4; i++ )
|
||||
casti_m256i( pad, i ) = _mm256_xor_si256( casti_m256i( pad, i ),
|
||||
casti_m256i( K, i ) );
|
||||
sha256_8way_update( &ctx->octx, pad, 64 );
|
||||
sha256_8x32_update( &ctx->octx, pad, 64 );
|
||||
}
|
||||
|
||||
void
|
||||
@@ -230,7 +230,7 @@ hmac_sha256_8way_update( hmac_sha256_8way_context *ctx, const void *in,
|
||||
size_t len )
|
||||
{
|
||||
/* Feed data to the inner SHA256 operation. */
|
||||
sha256_8way_update( &ctx->ictx, in, len );
|
||||
sha256_8x32_update( &ctx->ictx, in, len );
|
||||
}
|
||||
|
||||
/* Finish an HMAC-SHA256 operation. */
|
||||
@@ -240,13 +240,13 @@ hmac_sha256_8way_close( hmac_sha256_8way_context *ctx, void *digest )
|
||||
unsigned char ihash[32*8] __attribute__ ((aligned (128)));
|
||||
|
||||
/* Finish the inner SHA256 operation. */
|
||||
sha256_8way_close( &ctx->ictx, ihash );
|
||||
sha256_8x32_close( &ctx->ictx, ihash );
|
||||
|
||||
/* Feed the inner hash to the outer SHA256 operation. */
|
||||
sha256_8way_update( &ctx->octx, ihash, 32 );
|
||||
sha256_8x32_update( &ctx->octx, ihash, 32 );
|
||||
|
||||
/* Finish the outer SHA256 operation. */
|
||||
sha256_8way_close( &ctx->octx, digest );
|
||||
sha256_8x32_close( &ctx->octx, digest );
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -332,21 +332,21 @@ hmac_sha256_16way_init( hmac_sha256_16way_context *ctx, const void *_K,
|
||||
/* If Klen > 64, the key is really SHA256(K). */
|
||||
if ( Klen > 64 )
|
||||
{
|
||||
sha256_16way_init( &ctx->ictx );
|
||||
sha256_16way_update( &ctx->ictx, K, Klen );
|
||||
sha256_16way_close( &ctx->ictx, khash );
|
||||
sha256_16x32_init( &ctx->ictx );
|
||||
sha256_16x32_update( &ctx->ictx, K, Klen );
|
||||
sha256_16x32_close( &ctx->ictx, khash );
|
||||
K = khash;
|
||||
Klen = 32;
|
||||
}
|
||||
|
||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||
sha256_16way_init( &ctx->ictx );
|
||||
sha256_16x32_init( &ctx->ictx );
|
||||
memset( pad, 0x36, 64*16 );
|
||||
|
||||
for ( i = 0; i < Klen; i++ )
|
||||
casti_m512i( pad, i ) = _mm512_xor_si512( casti_m512i( pad, i ),
|
||||
casti_m512i( K, i ) );
|
||||
sha256_16way_update( &ctx->ictx, pad, 64 );
|
||||
sha256_16x32_update( &ctx->ictx, pad, 64 );
|
||||
|
||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||
sha256_16way_init( &ctx->octx );
|
||||
@@ -354,7 +354,7 @@ hmac_sha256_16way_init( hmac_sha256_16way_context *ctx, const void *_K,
|
||||
for ( i = 0; i < Klen/4; i++ )
|
||||
casti_m512i( pad, i ) = _mm512_xor_si512( casti_m512i( pad, i ),
|
||||
casti_m512i( K, i ) );
|
||||
sha256_16way_update( &ctx->octx, pad, 64 );
|
||||
sha256_16x32_update( &ctx->octx, pad, 64 );
|
||||
}
|
||||
|
||||
void
|
||||
@@ -362,7 +362,7 @@ hmac_sha256_16way_update( hmac_sha256_16way_context *ctx, const void *in,
|
||||
size_t len )
|
||||
{
|
||||
/* Feed data to the inner SHA256 operation. */
|
||||
sha256_16way_update( &ctx->ictx, in, len );
|
||||
sha256_16x32_update( &ctx->ictx, in, len );
|
||||
}
|
||||
|
||||
/* Finish an HMAC-SHA256 operation. */
|
||||
@@ -372,13 +372,13 @@ hmac_sha256_16way_close( hmac_sha256_16way_context *ctx, void *digest )
|
||||
unsigned char ihash[32*16] __attribute__ ((aligned (128)));
|
||||
|
||||
/* Finish the inner SHA256 operation. */
|
||||
sha256_16way_close( &ctx->ictx, ihash );
|
||||
sha256_16x32_close( &ctx->ictx, ihash );
|
||||
|
||||
/* Feed the inner hash to the outer SHA256 operation. */
|
||||
sha256_16way_update( &ctx->octx, ihash, 32 );
|
||||
sha256_16x32_update( &ctx->octx, ihash, 32 );
|
||||
|
||||
/* Finish the outer SHA256 operation. */
|
||||
sha256_16way_close( &ctx->octx, digest );
|
||||
sha256_16x32_close( &ctx->octx, digest );
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*-
|
||||
* Copyright 2005,2007,2009 Colin Percival
|
||||
* Copyright 2020 JayDDee@gmailcom
|
||||
* Copyright 2020 JayDDee246@gmailcom
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,11 +38,12 @@
|
||||
#include "simd-utils.h"
|
||||
#include "sha256-hash.h"
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#if defined(__SSE2__) || defined(__ARM_NEON)
|
||||
|
||||
typedef struct _hmac_sha256_4way_context
|
||||
{
|
||||
sha256_4way_context ictx;
|
||||
sha256_4way_context octx;
|
||||
sha256_4x32_context ictx;
|
||||
sha256_4x32_context octx;
|
||||
} hmac_sha256_4way_context;
|
||||
|
||||
//void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
|
||||
@@ -67,8 +68,8 @@ void pbkdf2_sha256_4way( uint8_t *, size_t, const uint8_t *, size_t,
|
||||
|
||||
typedef struct _hmac_sha256_8way_context
|
||||
{
|
||||
sha256_8way_context ictx;
|
||||
sha256_8way_context octx;
|
||||
sha256_8x32_context ictx;
|
||||
sha256_8x32_context octx;
|
||||
} hmac_sha256_8way_context;
|
||||
|
||||
//void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
|
||||
@@ -88,8 +89,8 @@ void pbkdf2_sha256_8way( uint8_t *, size_t, const uint8_t *, size_t,
|
||||
|
||||
typedef struct _hmac_sha256_16way_context
|
||||
{
|
||||
sha256_16way_context ictx;
|
||||
sha256_16way_context octx;
|
||||
sha256_16x32_context ictx;
|
||||
sha256_16x32_context octx;
|
||||
} hmac_sha256_16way_context;
|
||||
|
||||
//void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
|
||||
|
||||
@@ -30,6 +30,7 @@ static const uint32_t K256[64] =
|
||||
0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2
|
||||
};
|
||||
|
||||
#if defined(__SSE2__) || defined(__ARM_NEON)
|
||||
// SHA-256 4 way SSE2
|
||||
|
||||
#define CHs(X, Y, Z) \
|
||||
@@ -309,142 +310,6 @@ void sha256_4x32_final_rounds( v128_t *state_out, const v128_t *data,
|
||||
v128_store( state_out + 7, H );
|
||||
}
|
||||
|
||||
|
||||
# if 0
|
||||
|
||||
// Working correctly but still slower
|
||||
int sha256_4x32_transform_le_short( v128_t *state_out, const v128_t *data,
|
||||
const v128_t *state_in, const uint32_t *target )
|
||||
{
|
||||
v128_t A, B, C, D, E, F, G, H, T0, T1, T2;
|
||||
v128_t vmask, targ, hash;
|
||||
int t6_mask, flip;
|
||||
v128_t W[16]; v128_memcpy( W, data, 16 );
|
||||
|
||||
A = v128_load( state_in );
|
||||
B = v128_load( state_in+1 );
|
||||
C = v128_load( state_in+2 );
|
||||
D = v128_load( state_in+3 );
|
||||
E = v128_load( state_in+4 );
|
||||
F = v128_load( state_in+5 );
|
||||
G = v128_load( state_in+6 );
|
||||
H = v128_load( state_in+7 );
|
||||
|
||||
const v128_t IV7 = H;
|
||||
const v128_t IV6 = G;
|
||||
|
||||
SHA256_4X32_16ROUNDS( A, B, C, D, E, F, G, H, 0 );
|
||||
SHA256_4X32_MSG_EXPANSION( W );
|
||||
SHA256_4X32_16ROUNDS( A, B, C, D, E, F, G, H, 16 );
|
||||
SHA256_4X32_MSG_EXPANSION( W );
|
||||
SHA256_4X32_16ROUNDS( A, B, C, D, E, F, G, H, 32 );
|
||||
|
||||
W[ 0] = SHA256_4X32_MEXP( W[14], W[ 9], W[ 1], W[ 0] );
|
||||
W[ 1] = SHA256_4X32_MEXP( W[15], W[10], W[ 2], W[ 1] );
|
||||
W[ 2] = SHA256_4X32_MEXP( W[ 0], W[11], W[ 3], W[ 2] );
|
||||
W[ 3] = SHA256_4X32_MEXP( W[ 1], W[12], W[ 4], W[ 3] );
|
||||
W[ 4] = SHA256_4X32_MEXP( W[ 2], W[13], W[ 5], W[ 4] );
|
||||
W[ 5] = SHA256_4X32_MEXP( W[ 3], W[14], W[ 6], W[ 5] );
|
||||
W[ 6] = SHA256_4X32_MEXP( W[ 4], W[15], W[ 7], W[ 6] );
|
||||
W[ 7] = SHA256_4X32_MEXP( W[ 5], W[ 0], W[ 8], W[ 7] );
|
||||
W[ 8] = SHA256_4X32_MEXP( W[ 6], W[ 1], W[ 9], W[ 8] );
|
||||
W[ 9] = SHA256_4X32_MEXP( W[ 7], W[ 2], W[10], W[ 9] );
|
||||
W[10] = SHA256_4X32_MEXP( W[ 8], W[ 3], W[11], W[10] );
|
||||
W[11] = SHA256_4X32_MEXP( W[ 9], W[ 4], W[12], W[11] );
|
||||
W[12] = SHA256_4X32_MEXP( W[10], W[ 5], W[13], W[12] );
|
||||
|
||||
v128_t X_xor_Y, Y_xor_Z = v128_xor( B, C );
|
||||
|
||||
SHA256_4X32_ROUND( A, B, C, D, E, F, G, H, 0, 48 );
|
||||
SHA256_4X32_ROUND( H, A, B, C, D, E, F, G, 1, 48 );
|
||||
SHA256_4X32_ROUND( G, H, A, B, C, D, E, F, 2, 48 );
|
||||
SHA256_4X32_ROUND( F, G, H, A, B, C, D, E, 3, 48 );
|
||||
SHA256_4X32_ROUND( E, F, G, H, A, B, C, D, 4, 48 );
|
||||
SHA256_4X32_ROUND( D, E, F, G, H, A, B, C, 5, 48 );
|
||||
SHA256_4X32_ROUND( C, D, E, F, G, H, A, B, 6, 48 );
|
||||
SHA256_4X32_ROUND( B, C, D, E, F, G, H, A, 7, 48 );
|
||||
SHA256_4X32_ROUND( A, B, C, D, E, F, G, H, 8, 48 );
|
||||
SHA256_4X32_ROUND( H, A, B, C, D, E, F, G, 9, 48 );
|
||||
|
||||
T0 = v128_add32( v128_32( K256[58] ),
|
||||
v128_add4_32( BSG2_1( C ), CHs( C, D, E ), W[10], F ) );
|
||||
B = v128_add32( B, T0 );
|
||||
|
||||
T1 = v128_add32( v128_32( K256[59] ),
|
||||
v128_add4_32( BSG2_1( B ), CHs( B, C, D ), W[11], E ) );
|
||||
A = v128_add32( A, T1 );
|
||||
|
||||
T2 = v128_add32( v128_32( K256[60] ),
|
||||
v128_add4_32( BSG2_1( A ), CHs( A, B, C ), W[12], D ) );
|
||||
H = v128_add32( H, T2 );
|
||||
|
||||
targ = v128_32( target[7] );
|
||||
hash = v128_bswap32( v128_add32( H, IV7 ) );
|
||||
|
||||
flip = ( (int)target[7] < 0 ? 0xf : 0 ) ^ v128_movmask32( hash );
|
||||
|
||||
if ( likely(
|
||||
0xf == ( flip ^ v128_movmask32( v128_cmpgt32( hash, targ ) ) ) ))
|
||||
return 0;
|
||||
|
||||
t6_mask = v128_movmask32( vmask = v128_cmpeq32( hash, targ ) );
|
||||
|
||||
// round 58 part 2
|
||||
F = v128_add32( T0, v128_add32( BSG2_0( G ), MAJs( G, H, A ) ) );
|
||||
|
||||
// round 61 part 1
|
||||
W[13] = SHA256_4X32_MEXP( W[11], W[ 6], W[14], W[13] );
|
||||
T0 = v128_add32( v128_32( K256[61] ),
|
||||
v128_add4_32( BSG2_1( H ), CHs( H, A, B ), W[13], C ) );
|
||||
G = v128_add32( G, T0 );
|
||||
|
||||
if ( t6_mask )
|
||||
{
|
||||
targ = v128_and( vmask, v128_32( target[6] ) );
|
||||
hash = v128_bswap32( v128_add32( G, IV6 ) );
|
||||
|
||||
if ( ( 0 != ( t6_mask & v128_movmask32( v128_cmpeq32( hash, targ ) ) ) ))
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
flip = ( (int)target[6] < 0 ? 0xf : 0 ) ^ v128_movmask32( hash );
|
||||
if ( 0 != ( t6_mask & ( flip ^ v128_movmask32(
|
||||
v128_cmpgt32( hash, targ ) ) ) ) )
|
||||
return 0;
|
||||
else if ( target[6] == 0x80000000 )
|
||||
{
|
||||
if ( 0 == ( t6_mask & v128_movmask32(
|
||||
v128_cmpgt32( hash, v128_xor( hash, hash ) ) ) ) )
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// rounds 59 to 61 part 2
|
||||
E = v128_add32( T1, v128_add32( BSG2_0( F ), MAJs( F, G, H ) ) );
|
||||
D = v128_add32( T2, v128_add32( BSG2_0( E ), MAJs( E, F, G ) ) );
|
||||
C = v128_add32( T0, v128_add32( BSG2_0( D ), MAJs( D, E, F ) ) );
|
||||
|
||||
// rounds 62 & 63
|
||||
W[14] = SHA256_4X32_MEXP( W[12], W[ 7], W[15], W[14] );
|
||||
W[15] = SHA256_4X32_MEXP( W[13], W[ 8], W[ 0], W[15] );
|
||||
|
||||
SHA256_4X32_ROUND( C, D, E, F, G, H, A, B, 14, 48 );
|
||||
SHA256_4X32_ROUND( B, C, D, E, F, G, H, A, 15, 48 );
|
||||
|
||||
state_out[0] = v128_add32( state_in[0], A );
|
||||
state_out[1] = v128_add32( state_in[1], B );
|
||||
state_out[2] = v128_add32( state_in[2], C );
|
||||
state_out[3] = v128_add32( state_in[3], D );
|
||||
state_out[4] = v128_add32( state_in[4], E );
|
||||
state_out[5] = v128_add32( state_in[5], F );
|
||||
state_out[6] = v128_add32( state_in[6], G );
|
||||
state_out[7] = v128_add32( state_in[7], H );
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void sha256_4x32_init( sha256_4x32_context *sc )
|
||||
{
|
||||
sc->count_high = sc->count_low = 0;
|
||||
@@ -529,29 +394,31 @@ void sha256_4x32_full( void *dst, const void *data, size_t len )
|
||||
sha256_4x32_close( &ctx, dst );
|
||||
}
|
||||
|
||||
#endif // SSE2 || NEON
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// SHA-256 8 way
|
||||
|
||||
#define BSG2_0x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( mm256_ror_32( x, 2 ), \
|
||||
mm256_ror_32( x, 13 ) ), \
|
||||
mm256_ror_32( x, 22 ) )
|
||||
mm256_xor3( mm256_ror_32( x, 2 ), \
|
||||
mm256_ror_32( x, 13 ), \
|
||||
mm256_ror_32( x, 22 ) )
|
||||
|
||||
#define BSG2_1x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( mm256_ror_32( x, 6 ), \
|
||||
mm256_ror_32( x, 11 ) ), \
|
||||
mm256_ror_32( x, 25 ) )
|
||||
mm256_xor3( mm256_ror_32( x, 6 ), \
|
||||
mm256_ror_32( x, 11 ), \
|
||||
mm256_ror_32( x, 25 ) )
|
||||
|
||||
#define SSG2_0x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( mm256_ror_32( x, 7 ), \
|
||||
mm256_ror_32( x, 18 ) ), \
|
||||
_mm256_srli_epi32( x, 3 ) )
|
||||
mm256_xor3( mm256_ror_32( x, 7 ), \
|
||||
mm256_ror_32( x, 18 ), \
|
||||
_mm256_srli_epi32( x, 3 ) )
|
||||
|
||||
#define SSG2_1x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( mm256_ror_32( x, 17 ), \
|
||||
mm256_ror_32( x, 19 ) ), \
|
||||
_mm256_srli_epi32( x, 10 ) )
|
||||
mm256_xor3( mm256_ror_32( x, 17 ), \
|
||||
mm256_ror_32( x, 19 ), \
|
||||
_mm256_srli_epi32( x, 10 ) )
|
||||
|
||||
#define SHA256_8WAY_MEXP( a, b, c, d ) \
|
||||
mm256_add4_32( SSG2_1x( a ), b, SSG2_0x( c ), d );
|
||||
@@ -574,13 +441,8 @@ void sha256_4x32_full( void *dst, const void *data, size_t len )
|
||||
W[14] = SHA256_8WAY_MEXP( W[12], W[ 7], W[15], W[14] ); \
|
||||
W[15] = SHA256_8WAY_MEXP( W[13], W[ 8], W[ 0], W[15] );
|
||||
|
||||
|
||||
// With AVX512VL ternary logic optimizations are available.
|
||||
// If not optimize by forwarding the result of X^Y in MAJ to the next round
|
||||
// to avoid recalculating it as Y^Z. This optimization is not applicable
|
||||
// when MAJ is optimized with ternary logic.
|
||||
|
||||
#if defined(VL256)
|
||||
// AVX512 or AVX10-256
|
||||
|
||||
#define CHx(X, Y, Z) _mm256_ternarylogic_epi32( X, Y, Z, 0xca )
|
||||
|
||||
@@ -745,7 +607,7 @@ static inline void SHA256_8WAY_TRANSFORM( __m256i *out, __m256i *W,
|
||||
}
|
||||
|
||||
// accepts LE input data
|
||||
void sha256_8way_transform_le( __m256i *state_out, const __m256i *data,
|
||||
void sha256_8x32_transform_le( __m256i *state_out, const __m256i *data,
|
||||
const __m256i *state_in )
|
||||
{
|
||||
__m256i W[16];
|
||||
@@ -754,7 +616,7 @@ void sha256_8way_transform_le( __m256i *state_out, const __m256i *data,
|
||||
}
|
||||
|
||||
// Accepts BE input data, need to bswap
|
||||
void sha256_8way_transform_be( __m256i *state_out, const __m256i *data,
|
||||
void sha256_8x32_transform_be( __m256i *state_out, const __m256i *data,
|
||||
const __m256i *state_in )
|
||||
{
|
||||
__m256i W[16];
|
||||
@@ -764,7 +626,7 @@ void sha256_8way_transform_be( __m256i *state_out, const __m256i *data,
|
||||
}
|
||||
|
||||
// Aggressive prehashing, LE byte order
|
||||
void sha256_8way_prehash_3rounds( __m256i *state_mid, __m256i *X,
|
||||
void sha256_8x32_prehash_3rounds( __m256i *state_mid, __m256i *X,
|
||||
const __m256i *W, const __m256i *state_in )
|
||||
{
|
||||
__m256i A, B, C, D, E, F, G, H, T1;
|
||||
@@ -813,7 +675,7 @@ void sha256_8way_prehash_3rounds( __m256i *state_mid, __m256i *X,
|
||||
_mm256_store_si256( state_mid + 7, H );
|
||||
}
|
||||
|
||||
void sha256_8way_final_rounds( __m256i *state_out, const __m256i *data,
|
||||
void sha256_8x32_final_rounds( __m256i *state_out, const __m256i *data,
|
||||
const __m256i *state_in, const __m256i *state_mid, const __m256i *X )
|
||||
{
|
||||
__m256i A, B, C, D, E, F, G, H;
|
||||
@@ -914,14 +776,12 @@ void sha256_8way_final_rounds( __m256i *state_out, const __m256i *data,
|
||||
_mm256_store_si256( state_out + 7, H );
|
||||
}
|
||||
|
||||
int sha256_8way_transform_le_short( __m256i *state_out, const __m256i *data,
|
||||
int sha256_8x32_transform_le_short( __m256i *state_out, const __m256i *data,
|
||||
const __m256i *state_in, const uint32_t *target )
|
||||
{
|
||||
__m256i A, B, C, D, E, F, G, H, T0, T1, T2;
|
||||
__m256i vmask, targ, hash;
|
||||
__m256i W[16]; memcpy_256( W, data, 16 );
|
||||
const __m256i bswap_shuf = mm256_bcast_m128( _mm_set_epi64x(
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) );
|
||||
uint8_t flip, t6_mask;
|
||||
|
||||
A = _mm256_load_si256( state_in );
|
||||
@@ -1012,7 +872,7 @@ int sha256_8way_transform_le_short( __m256i *state_out, const __m256i *data,
|
||||
|
||||
// Got H, test it.
|
||||
targ = v256_32( target[7] );
|
||||
hash = _mm256_shuffle_epi8( _mm256_add_epi32( H, IV7 ), bswap_shuf );
|
||||
hash = mm256_bswap_32( _mm256_add_epi32( H, IV7 ) );
|
||||
if ( target[7] )
|
||||
{
|
||||
flip = ( (int)target[7] < 0 ? -1 : 0 ) ^ mm256_movmask_32( hash );
|
||||
@@ -1035,7 +895,7 @@ int sha256_8way_transform_le_short( __m256i *state_out, const __m256i *data,
|
||||
{
|
||||
// Testing H was inconclusive: hash7 == target7, need to test G
|
||||
targ = _mm256_and_si256( vmask, v256_32( target[6] ) );
|
||||
hash = _mm256_shuffle_epi8( _mm256_add_epi32( G, IV6 ), bswap_shuf );
|
||||
hash = mm256_bswap_32( _mm256_add_epi32( G, IV6 ) );
|
||||
|
||||
if ( likely( 0 == ( t6_mask & mm256_movmask_32(
|
||||
_mm256_cmpeq_epi32( hash, targ ) ) ) ))
|
||||
@@ -1083,8 +943,7 @@ int sha256_8way_transform_le_short( __m256i *state_out, const __m256i *data,
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
void sha256_8way_init( sha256_8way_context *sc )
|
||||
void sha256_8x32_init( sha256_8x32_context *sc )
|
||||
{
|
||||
sc->count_high = sc->count_low = 0;
|
||||
sc->val[0] = v256_32( sha256_iv[0] );
|
||||
@@ -1100,7 +959,7 @@ void sha256_8way_init( sha256_8way_context *sc )
|
||||
// need to handle odd byte length for yespower.
|
||||
// Assume only last update is odd.
|
||||
|
||||
void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len )
|
||||
void sha256_8x32_update( sha256_8x32_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
size_t ptr;
|
||||
@@ -1121,7 +980,7 @@ void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len )
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
sha256_8way_transform_be( sc->val, sc->buf, sc->val );
|
||||
sha256_8x32_transform_be( sc->val, sc->buf, sc->val );
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
@@ -1132,7 +991,7 @@ void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len )
|
||||
}
|
||||
}
|
||||
|
||||
void sha256_8way_close( sha256_8way_context *sc, void *dst )
|
||||
void sha256_8x32_close( sha256_8x32_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr;
|
||||
uint32_t low, high;
|
||||
@@ -1146,7 +1005,7 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst )
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_256( sc->buf + (ptr>>2), (buf_size - ptr) >> 2 );
|
||||
sha256_8way_transform_be( sc->val, sc->buf, sc->val );
|
||||
sha256_8x32_transform_be( sc->val, sc->buf, sc->val );
|
||||
memset_zero_256( sc->buf, pad >> 2 );
|
||||
}
|
||||
else
|
||||
@@ -1159,17 +1018,17 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst )
|
||||
sc->buf[ pad >> 2 ] = v256_32( bswap_32( high ) );
|
||||
sc->buf[ ( pad+4 ) >> 2 ] = v256_32( bswap_32( low ) );
|
||||
|
||||
sha256_8way_transform_be( sc->val, sc->buf, sc->val );
|
||||
sha256_8x32_transform_be( sc->val, sc->buf, sc->val );
|
||||
|
||||
mm256_block_bswap_32( dst, sc->val );
|
||||
}
|
||||
|
||||
void sha256_8way_full( void *dst, const void *data, size_t len )
|
||||
void sha256_8x32_full( void *dst, const void *data, size_t len )
|
||||
{
|
||||
sha256_8way_context ctx;
|
||||
sha256_8way_init( &ctx );
|
||||
sha256_8way_update( &ctx, data, len );
|
||||
sha256_8way_close( &ctx, dst );
|
||||
sha256_8x32_context ctx;
|
||||
sha256_8x32_init( &ctx );
|
||||
sha256_8x32_update( &ctx, data, len );
|
||||
sha256_8x32_close( &ctx, dst );
|
||||
}
|
||||
|
||||
#if defined(SIMD512)
|
||||
@@ -1302,7 +1161,7 @@ static inline void SHA256_16WAY_TRANSFORM( __m512i *out, __m512i *W,
|
||||
}
|
||||
|
||||
// accepts LE input data
|
||||
void sha256_16way_transform_le( __m512i *state_out, const __m512i *data,
|
||||
void sha256_16x32_transform_le( __m512i *state_out, const __m512i *data,
|
||||
const __m512i *state_in )
|
||||
{
|
||||
__m512i W[16];
|
||||
@@ -1311,7 +1170,7 @@ void sha256_16way_transform_le( __m512i *state_out, const __m512i *data,
|
||||
}
|
||||
|
||||
// Accepts BE input data, need to bswap
|
||||
void sha256_16way_transform_be( __m512i *state_out, const __m512i *data,
|
||||
void sha256_16x32_transform_be( __m512i *state_out, const __m512i *data,
|
||||
const __m512i *state_in )
|
||||
{
|
||||
__m512i W[16];
|
||||
@@ -1321,7 +1180,7 @@ void sha256_16way_transform_be( __m512i *state_out, const __m512i *data,
|
||||
}
|
||||
|
||||
// Aggressive prehashing, LE byte order
|
||||
void sha256_16way_prehash_3rounds( __m512i *state_mid, __m512i *X,
|
||||
void sha256_16x32_prehash_3rounds( __m512i *state_mid, __m512i *X,
|
||||
const __m512i *W, const __m512i *state_in )
|
||||
{
|
||||
__m512i A, B, C, D, E, F, G, H, T1;
|
||||
@@ -1369,7 +1228,7 @@ void sha256_16way_prehash_3rounds( __m512i *state_mid, __m512i *X,
|
||||
_mm512_store_si512( state_mid + 7, H );
|
||||
}
|
||||
|
||||
void sha256_16way_final_rounds( __m512i *state_out, const __m512i *data,
|
||||
void sha256_16x32_final_rounds( __m512i *state_out, const __m512i *data,
|
||||
const __m512i *state_in, const __m512i *state_mid, const __m512i *X )
|
||||
{
|
||||
__m512i A, B, C, D, E, F, G, H;
|
||||
@@ -1470,15 +1329,13 @@ void sha256_16way_final_rounds( __m512i *state_out, const __m512i *data,
|
||||
|
||||
// returns 0 if hash aborted early and invalid,
|
||||
// returns 1 for completed hash with at least one valid candidate.
|
||||
int sha256_16way_transform_le_short( __m512i *state_out, const __m512i *data,
|
||||
int sha256_16x32_transform_le_short( __m512i *state_out, const __m512i *data,
|
||||
const __m512i *state_in, const uint32_t *target )
|
||||
{
|
||||
__m512i A, B, C, D, E, F, G, H, hash, targ;
|
||||
__m512i T0, T1, T2;
|
||||
__m512i W[16]; memcpy_512( W, data, 16 );
|
||||
__mmask16 t6_mask;
|
||||
const __m512i bswap_shuf = mm512_bcast_m128( _mm_set_epi64x(
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) );
|
||||
|
||||
A = _mm512_load_si512( state_in );
|
||||
B = _mm512_load_si512( state_in+1 );
|
||||
@@ -1588,7 +1445,7 @@ int sha256_16way_transform_le_short( __m512i *state_out, const __m512i *data,
|
||||
H = _mm512_add_epi32( H, T2 );
|
||||
|
||||
// got H, test it against target[7]
|
||||
hash = _mm512_shuffle_epi8( _mm512_add_epi32( H , IV7 ), bswap_shuf );
|
||||
hash = mm512_bswap_32( _mm512_add_epi32( H , IV7 ) );
|
||||
targ = v512_32( target[7] );
|
||||
if ( target[7] )
|
||||
if ( likely( 0 == _mm512_cmple_epu32_mask( hash, targ ) ))
|
||||
@@ -1608,7 +1465,7 @@ int sha256_16way_transform_le_short( __m512i *state_out, const __m512i *data,
|
||||
// got G, test it against target[6] if indicated
|
||||
if ( (uint16_t)t6_mask )
|
||||
{
|
||||
hash = _mm512_shuffle_epi8( _mm512_add_epi32( G, IV6 ), bswap_shuf );
|
||||
hash = mm512_bswap_32( _mm512_add_epi32( G, IV6 ) );
|
||||
targ = v512_32( target[6] );
|
||||
if ( likely( 0 == _mm512_mask_cmple_epu32_mask( t6_mask, hash, targ ) ))
|
||||
return 0;
|
||||
@@ -1644,7 +1501,7 @@ int sha256_16way_transform_le_short( __m512i *state_out, const __m512i *data,
|
||||
return 1;
|
||||
}
|
||||
|
||||
void sha256_16way_init( sha256_16way_context *sc )
|
||||
void sha256_16x32_init( sha256_16x32_context *sc )
|
||||
{
|
||||
sc->count_high = sc->count_low = 0;
|
||||
sc->val[0] = v512_32( sha256_iv[0] );
|
||||
@@ -1657,7 +1514,7 @@ void sha256_16way_init( sha256_16way_context *sc )
|
||||
sc->val[7] = v512_32( sha256_iv[7] );
|
||||
}
|
||||
|
||||
void sha256_16way_update( sha256_16way_context *sc, const void *data,
|
||||
void sha256_16x32_update( sha256_16x32_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
@@ -1679,7 +1536,7 @@ void sha256_16way_update( sha256_16way_context *sc, const void *data,
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
sha256_16way_transform_be( sc->val, sc->buf, sc->val );
|
||||
sha256_16x32_transform_be( sc->val, sc->buf, sc->val );
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
@@ -1690,7 +1547,7 @@ void sha256_16way_update( sha256_16way_context *sc, const void *data,
|
||||
}
|
||||
}
|
||||
|
||||
void sha256_16way_close( sha256_16way_context *sc, void *dst )
|
||||
void sha256_16x32_close( sha256_16x32_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr;
|
||||
uint32_t low, high;
|
||||
@@ -1704,7 +1561,7 @@ void sha256_16way_close( sha256_16way_context *sc, void *dst )
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_512( sc->buf + (ptr>>2), (buf_size - ptr) >> 2 );
|
||||
sha256_16way_transform_be( sc->val, sc->buf, sc->val );
|
||||
sha256_16x32_transform_be( sc->val, sc->buf, sc->val );
|
||||
memset_zero_512( sc->buf, pad >> 2 );
|
||||
}
|
||||
else
|
||||
@@ -1717,17 +1574,17 @@ void sha256_16way_close( sha256_16way_context *sc, void *dst )
|
||||
sc->buf[ pad >> 2 ] = v512_32( bswap_32( high ) );
|
||||
sc->buf[ ( pad+4 ) >> 2 ] = v512_32( bswap_32( low ) );
|
||||
|
||||
sha256_16way_transform_be( sc->val, sc->buf, sc->val );
|
||||
sha256_16x32_transform_be( sc->val, sc->buf, sc->val );
|
||||
|
||||
mm512_block_bswap_32( dst, sc->val );
|
||||
}
|
||||
|
||||
void sha256_16way_full( void *dst, const void *data, size_t len )
|
||||
void sha256_16x32_full( void *dst, const void *data, size_t len )
|
||||
{
|
||||
sha256_16way_context ctx;
|
||||
sha256_16way_init( &ctx );
|
||||
sha256_16way_update( &ctx, data, len );
|
||||
sha256_16way_close( &ctx, dst );
|
||||
sha256_16x32_context ctx;
|
||||
sha256_16x32_init( &ctx );
|
||||
sha256_16x32_update( &ctx, data, len );
|
||||
sha256_16x32_close( &ctx, dst );
|
||||
}
|
||||
|
||||
#undef CH
|
||||
|
||||
@@ -180,20 +180,9 @@ void sha256_8x32_final_rounds( __m256i *state_out, const __m256i *data,
|
||||
int sha256_8x32_transform_le_short( __m256i *state_out, const __m256i *data,
|
||||
const __m256i *state_in, const uint32_t *target );
|
||||
|
||||
// Temporary API during naming transition
|
||||
#define sha256_8way_context sha256_8x32_context
|
||||
#define sha256_8way_init sha256_8x32_init
|
||||
#define sha256_8way_update sha256_8x32_update
|
||||
#define sha256_8way_close sha256_8x32_close
|
||||
#define sha256_8way_full sha256_8x32_full
|
||||
#define sha256_8way_transform_le sha256_8x32_transform_le
|
||||
#define sha256_8way_transform_be sha256_8x32_transform_be
|
||||
#define sha256_8way_prehash_3rounds sha256_8x32_prehash_3rounds
|
||||
#define sha256_8way_final_rounds sha256_8x32_final_rounds
|
||||
#define sha256_8way_transform_le_short sha256_8x32_transform_le_short
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
#if defined(__SSE2__) || defined(__ARM_NEON)
|
||||
// SHA-256 4 way x86_64 with SSE2 or AArch64 with NEON
|
||||
|
||||
typedef struct
|
||||
@@ -219,16 +208,5 @@ void sha256_4x32_final_rounds( v128_t *state_out, const v128_t *data,
|
||||
int sha256_4x32_transform_le_short( v128_t *state_out, const v128_t *data,
|
||||
const v128_t *state_in, const uint32_t *target );
|
||||
|
||||
// Temporary API during naming transition
|
||||
#define sha256_4way_context sha256_4x32_context
|
||||
#define sha256_4way_init sha256_4x32_init
|
||||
#define sha256_4way_update sha256_4x32_update
|
||||
#define sha256_4way_close sha256_4x32_close
|
||||
#define sha256_4way_full sha256_4x32_full
|
||||
#define sha256_4way_transform_le sha256_4x32_transform_le
|
||||
#define sha256_4way_transform_be sha256_4x32_transform_be
|
||||
#define sha256_4way_prehash_3rounds sha256_4x32_prehash_3rounds
|
||||
#define sha256_4way_final_rounds sha256_4x32_final_rounds
|
||||
#define sha256_4way_transform_le_short sha256_4x32_transform_le_short
|
||||
|
||||
#endif
|
||||
#endif // SSE2 || NEON
|
||||
#endif // SHA256_HASH_H__
|
||||
|
||||
@@ -32,8 +32,6 @@ int scanhash_sha256d_sha( struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
const v128_t shuf_bswap32 =
|
||||
v128_set64( 0x0c0d0e0f08090a0bULL, 0x0405060700010203ULL );
|
||||
|
||||
// hash first 64 byte block of data
|
||||
sha256_transform_le( mstatea, pdata, sha256_iv );
|
||||
@@ -69,10 +67,8 @@ int scanhash_sha256d_sha( struct work *work, uint32_t max_nonce,
|
||||
|
||||
if ( unlikely( bswap_32( hasha[7] ) <= ptarget[7] ) )
|
||||
{
|
||||
casti_v128( hasha, 0 ) =
|
||||
_mm_shuffle_epi8( casti_v128( hasha, 0 ), shuf_bswap32 );
|
||||
casti_v128( hasha, 1 ) =
|
||||
_mm_shuffle_epi8( casti_v128( hasha, 1 ), shuf_bswap32 );
|
||||
casti_v128( hasha, 0 ) = v128_bswap32( casti_v128( hasha, 0 ) );
|
||||
casti_v128( hasha, 1 ) = v128_bswap32( casti_v128( hasha, 1 ) );
|
||||
if ( likely( valid_hash( hasha, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n;
|
||||
@@ -81,10 +77,8 @@ int scanhash_sha256d_sha( struct work *work, uint32_t max_nonce,
|
||||
}
|
||||
if ( unlikely( bswap_32( hashb[7] ) <= ptarget[7] ) )
|
||||
{
|
||||
casti_v128( hashb, 0 ) =
|
||||
_mm_shuffle_epi8( casti_v128( hashb, 0 ), shuf_bswap32 );
|
||||
casti_v128( hashb, 1 ) =
|
||||
_mm_shuffle_epi8( casti_v128( hashb, 1 ), shuf_bswap32 );
|
||||
casti_v128( hashb, 0 ) = v128_bswap32( casti_v128( hashb, 0 ) );
|
||||
casti_v128( hashb, 1 ) = v128_bswap32( casti_v128( hashb, 1 ) );
|
||||
if ( likely( valid_hash( hashb, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n+1;
|
||||
@@ -204,8 +198,6 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
||||
const int thr_id = mythr->id;
|
||||
const __m512i sixteen = v512_32( 16 );
|
||||
const bool bench = opt_benchmark;
|
||||
const __m256i bswap_shuf = mm256_bcast_m128( _mm_set_epi64x(
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) );
|
||||
|
||||
// prehash first block directly from pdata
|
||||
sha256_transform_le( phash, pdata, sha256_iv );
|
||||
@@ -231,7 +223,7 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
||||
buf[15] = v512_32( 80*8 ); // bit count
|
||||
|
||||
// partially pre-expand & prehash second message block, avoiding the nonces
|
||||
sha256_16way_prehash_3rounds( mstate2, mexp_pre, buf, mstate1 );
|
||||
sha256_16x32_prehash_3rounds( mstate2, mexp_pre, buf, mstate1 );
|
||||
|
||||
// vectorize IV for second hash
|
||||
istate[0] = v512_32( sha256_iv[0] );
|
||||
@@ -250,15 +242,14 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
||||
|
||||
do
|
||||
{
|
||||
sha256_16way_final_rounds( block, buf, mstate1, mstate2, mexp_pre );
|
||||
if ( unlikely( sha256_16way_transform_le_short(
|
||||
sha256_16x32_final_rounds( block, buf, mstate1, mstate2, mexp_pre );
|
||||
if ( unlikely( sha256_16x32_transform_le_short(
|
||||
hash32, block, istate, ptarget ) ) )
|
||||
{
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
{
|
||||
extr_lane_16x32( phash, hash32, lane, 256 );
|
||||
casti_m256i( phash, 0 ) =
|
||||
_mm256_shuffle_epi8( casti_m256i( phash, 0 ), bswap_shuf );
|
||||
casti_m256i( phash, 0 ) = mm256_bswap_32( casti_m256i( phash, 0 ) );
|
||||
if ( likely( valid_hash( phash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
@@ -299,8 +290,6 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
||||
const bool bench = opt_benchmark;
|
||||
const __m256i last_byte = v256_32( 0x80000000 );
|
||||
const __m256i eight = v256_32( 8 );
|
||||
const __m256i bswap_shuf = mm256_bcast_m128( _mm_set_epi64x(
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) );
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
vdata[i] = v256_32( pdata[i] );
|
||||
@@ -325,22 +314,22 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
||||
istate[6] = v256_32( sha256_iv[6] );
|
||||
istate[7] = v256_32( sha256_iv[7] );
|
||||
|
||||
sha256_8way_transform_le( mstate1, vdata, istate );
|
||||
sha256_8x32_transform_le( mstate1, vdata, istate );
|
||||
|
||||
// Do 3 rounds on the first 12 bytes of the next block
|
||||
sha256_8way_prehash_3rounds( mstate2, mexp_pre, vdata + 16, mstate1 );
|
||||
sha256_8x32_prehash_3rounds( mstate2, mexp_pre, vdata + 16, mstate1 );
|
||||
|
||||
do
|
||||
{
|
||||
sha256_8way_final_rounds( block, vdata+16, mstate1, mstate2, mexp_pre );
|
||||
if ( unlikely( sha256_8way_transform_le_short( hash32, block,
|
||||
sha256_8x32_final_rounds( block, vdata+16, mstate1, mstate2, mexp_pre );
|
||||
if ( unlikely( sha256_8x32_transform_le_short( hash32, block,
|
||||
istate, ptarget ) ) )
|
||||
{
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
||||
casti_m256i( lane_hash, 0 ) =
|
||||
_mm256_shuffle_epi8( casti_m256i( lane_hash, 0 ), bswap_shuf );
|
||||
mm256_bswap_32( casti_m256i( lane_hash, 0 ) );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#define SHA256D_NEON_SHA2 1
|
||||
#elif defined(__AVX2__)
|
||||
#define SHA256D_8WAY 1
|
||||
#else
|
||||
#elif defined(__SSE2__) || defined(__ARM_NEON)
|
||||
#define SHA256D_4WAY 1
|
||||
#endif
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
#elif defined (__SSE2__) || defined(__ARM_NEON)
|
||||
#define SHA256DT_4X32 1
|
||||
#endif
|
||||
// else ref, should never happen
|
||||
|
||||
static const uint32_t sha256dt_iv[8] __attribute__ ((aligned (32))) =
|
||||
{
|
||||
@@ -205,8 +204,6 @@ int scanhash_sha256dt_16x32( struct work *work, const uint32_t max_nonce,
|
||||
const int thr_id = mythr->id;
|
||||
const __m512i sixteen = v512_32( 16 );
|
||||
const bool bench = opt_benchmark;
|
||||
const __m256i bswap_shuf = mm256_bcast_m128( v128_set64(
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) );
|
||||
|
||||
// prehash first block directly from pdata
|
||||
sha256_transform_le( phash, pdata, sha256dt_iv );
|
||||
@@ -258,8 +255,7 @@ int scanhash_sha256dt_16x32( struct work *work, const uint32_t max_nonce,
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
{
|
||||
extr_lane_16x32( phash, hash32, lane, 256 );
|
||||
casti_m256i( phash, 0 ) =
|
||||
_mm256_shuffle_epi8( casti_m256i( phash, 0 ), bswap_shuf );
|
||||
casti_m256i( phash, 0 ) = mm256_bswap_32( casti_m256i( phash, 0 ) );
|
||||
if ( likely( valid_hash( phash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
@@ -298,8 +294,6 @@ int scanhash_sha256dt_8x32( struct work *work, const uint32_t max_nonce,
|
||||
const bool bench = opt_benchmark;
|
||||
const __m256i last_byte = v256_32( 0x80000000 );
|
||||
const __m256i eight = v256_32( 8 );
|
||||
const __m256i bswap_shuf = mm256_bcast_m128( v128_set64(
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) );
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
vdata[i] = v256_32( pdata[i] );
|
||||
@@ -339,7 +333,7 @@ int scanhash_sha256dt_8x32( struct work *work, const uint32_t max_nonce,
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
||||
casti_m256i( lane_hash, 0 ) =
|
||||
_mm256_shuffle_epi8( casti_m256i( lane_hash, 0 ), bswap_shuf );
|
||||
mm256_bswap_32( casti_m256i( lane_hash, 0 ) );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
@@ -406,7 +400,6 @@ int scanhash_sha256dt_4x32( struct work *work, const uint32_t max_nonce,
|
||||
do
|
||||
{
|
||||
sha256_4x32_final_rounds( block, vdata+16, mhash1, mhash2, mexp_pre );
|
||||
// sha256_4x32_transform_le( block, vdata+16, mhash1 );
|
||||
sha256_4x32_transform_le( hash32, block, iv );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
|
||||
@@ -7,28 +7,28 @@
|
||||
|
||||
#if defined(SHA256T_16WAY)
|
||||
|
||||
static __thread sha256_16way_context sha256_ctx16 __attribute__ ((aligned (64)));
|
||||
static __thread sha256_16x32_context sha256_ctx16 __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256q_16way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t vhash[8*16] __attribute__ ((aligned (64)));
|
||||
sha256_16way_context ctx;
|
||||
sha256_16x32_context ctx;
|
||||
memcpy( &ctx, &sha256_ctx16, sizeof ctx );
|
||||
|
||||
sha256_16way_update( &ctx, input + (64<<4), 16 );
|
||||
sha256_16way_close( &ctx, vhash );
|
||||
sha256_16x32_update( &ctx, input + (64<<4), 16 );
|
||||
sha256_16x32_close( &ctx, vhash );
|
||||
|
||||
sha256_16way_init( &ctx );
|
||||
sha256_16way_update( &ctx, vhash, 32 );
|
||||
sha256_16way_close( &ctx, vhash );
|
||||
sha256_16x32_init( &ctx );
|
||||
sha256_16x32_update( &ctx, vhash, 32 );
|
||||
sha256_16x32_close( &ctx, vhash );
|
||||
|
||||
sha256_16way_init( &ctx );
|
||||
sha256_16way_update( &ctx, vhash, 32 );
|
||||
sha256_16way_close( &ctx, vhash );
|
||||
sha256_16x32_init( &ctx );
|
||||
sha256_16x32_update( &ctx, vhash, 32 );
|
||||
sha256_16x32_close( &ctx, vhash );
|
||||
|
||||
sha256_16way_init( &ctx );
|
||||
sha256_16way_update( &ctx, vhash, 32 );
|
||||
sha256_16way_close( &ctx, output );
|
||||
sha256_16x32_init( &ctx );
|
||||
sha256_16x32_update( &ctx, vhash, 32 );
|
||||
sha256_16x32_close( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_sha256q_16way( struct work *work, const uint32_t max_nonce,
|
||||
@@ -51,8 +51,8 @@ int scanhash_sha256q_16way( struct work *work, const uint32_t max_nonce,
|
||||
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||
sha256_16way_init( &sha256_ctx16 );
|
||||
sha256_16way_update( &sha256_ctx16, vdata, 64 );
|
||||
sha256_16x32_init( &sha256_ctx16 );
|
||||
sha256_16x32_update( &sha256_ctx16, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
@@ -80,28 +80,28 @@ int scanhash_sha256q_16way( struct work *work, const uint32_t max_nonce,
|
||||
|
||||
#if defined(SHA256T_8WAY)
|
||||
|
||||
static __thread sha256_8way_context sha256_ctx8 __attribute__ ((aligned (64)));
|
||||
static __thread sha256_8x32_context sha256_ctx8 __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256q_8way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
sha256_8way_context ctx;
|
||||
sha256_8x32_context ctx;
|
||||
memcpy( &ctx, &sha256_ctx8, sizeof ctx );
|
||||
|
||||
sha256_8way_update( &ctx, input + (64<<3), 16 );
|
||||
sha256_8way_close( &ctx, vhash );
|
||||
sha256_8x32_update( &ctx, input + (64<<3), 16 );
|
||||
sha256_8x32_close( &ctx, vhash );
|
||||
|
||||
sha256_8way_init( &ctx );
|
||||
sha256_8way_update( &ctx, vhash, 32 );
|
||||
sha256_8way_close( &ctx, vhash );
|
||||
sha256_8x32_init( &ctx );
|
||||
sha256_8x32_update( &ctx, vhash, 32 );
|
||||
sha256_8x32_close( &ctx, vhash );
|
||||
|
||||
sha256_8way_init( &ctx );
|
||||
sha256_8way_update( &ctx, vhash, 32 );
|
||||
sha256_8way_close( &ctx, vhash );
|
||||
sha256_8x32_init( &ctx );
|
||||
sha256_8x32_update( &ctx, vhash, 32 );
|
||||
sha256_8x32_close( &ctx, vhash );
|
||||
|
||||
sha256_8way_init( &ctx );
|
||||
sha256_8way_update( &ctx, vhash, 32 );
|
||||
sha256_8way_close( &ctx, output );
|
||||
sha256_8x32_init( &ctx );
|
||||
sha256_8x32_update( &ctx, vhash, 32 );
|
||||
sha256_8x32_close( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_sha256q_8way( struct work *work, const uint32_t max_nonce,
|
||||
@@ -123,8 +123,8 @@ int scanhash_sha256q_8way( struct work *work, const uint32_t max_nonce,
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
|
||||
sha256_8way_init( &sha256_ctx8 );
|
||||
sha256_8way_update( &sha256_ctx8, vdata, 64 );
|
||||
sha256_8x32_init( &sha256_ctx8 );
|
||||
sha256_8x32_update( &sha256_ctx8, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
@@ -152,28 +152,28 @@ int scanhash_sha256q_8way( struct work *work, const uint32_t max_nonce,
|
||||
|
||||
#if defined(SHA256T_4WAY)
|
||||
|
||||
static __thread sha256_4way_context sha256_ctx4 __attribute__ ((aligned (64)));
|
||||
static __thread sha256_4x32_context sha256_ctx4 __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256q_4way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
sha256_4way_context ctx;
|
||||
sha256_4x32_context ctx;
|
||||
memcpy( &ctx, &sha256_ctx4, sizeof ctx );
|
||||
|
||||
sha256_4way_update( &ctx, input + (64<<2), 16 );
|
||||
sha256_4way_close( &ctx, vhash );
|
||||
sha256_4x32_update( &ctx, input + (64<<2), 16 );
|
||||
sha256_4x32_close( &ctx, vhash );
|
||||
|
||||
sha256_4way_init( &ctx );
|
||||
sha256_4way_update( &ctx, vhash, 32 );
|
||||
sha256_4way_close( &ctx, vhash );
|
||||
sha256_4x32_init( &ctx );
|
||||
sha256_4x32_update( &ctx, vhash, 32 );
|
||||
sha256_4x32_close( &ctx, vhash );
|
||||
|
||||
sha256_4way_init( &ctx );
|
||||
sha256_4way_update( &ctx, vhash, 32 );
|
||||
sha256_4way_close( &ctx, vhash );
|
||||
sha256_4x32_init( &ctx );
|
||||
sha256_4x32_update( &ctx, vhash, 32 );
|
||||
sha256_4x32_close( &ctx, vhash );
|
||||
|
||||
sha256_4way_init( &ctx );
|
||||
sha256_4way_update( &ctx, vhash, 32 );
|
||||
sha256_4way_close( &ctx, output );
|
||||
sha256_4x32_init( &ctx );
|
||||
sha256_4x32_update( &ctx, vhash, 32 );
|
||||
sha256_4x32_close( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
|
||||
@@ -205,8 +205,8 @@ int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
|
||||
0 };
|
||||
|
||||
v128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
sha256_4way_init( &sha256_ctx4 );
|
||||
sha256_4way_update( &sha256_ctx4, vdata, 64 );
|
||||
sha256_4x32_init( &sha256_ctx4 );
|
||||
sha256_4x32_update( &sha256_ctx4, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
|
||||
@@ -35,8 +35,6 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
||||
const int thr_id = mythr->id;
|
||||
const __m512i sixteen = v512_32( 16 );
|
||||
const bool bench = opt_benchmark;
|
||||
const __m256i bswap_shuf = mm256_bcast_m128( v128_set64(
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) );
|
||||
|
||||
// prehash first block directly from pdata
|
||||
sha256_transform_le( phash, pdata, sha256_iv );
|
||||
@@ -62,7 +60,7 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
||||
buf[15] = v512_32( 80*8 ); // bit count
|
||||
|
||||
// partially pre-expand & prehash second message block, avoiding the nonces
|
||||
sha256_16way_prehash_3rounds( mstate2, mexp_pre, buf, mstate1 );
|
||||
sha256_16x32_prehash_3rounds( mstate2, mexp_pre, buf, mstate1 );
|
||||
|
||||
// vectorize IV for 2nd & 3rd sha256
|
||||
istate[0] = v512_32( sha256_iv[0] );
|
||||
@@ -81,18 +79,17 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
||||
|
||||
do
|
||||
{
|
||||
sha256_16way_final_rounds( block, buf, mstate1, mstate2, mexp_pre );
|
||||
sha256_16x32_final_rounds( block, buf, mstate1, mstate2, mexp_pre );
|
||||
|
||||
sha256_16way_transform_le( block, block, istate );
|
||||
sha256_16x32_transform_le( block, block, istate );
|
||||
|
||||
if ( sha256_16way_transform_le_short( hash32, block, istate, ptarget ) )
|
||||
if ( sha256_16x32_transform_le_short( hash32, block, istate, ptarget ) )
|
||||
{
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
if ( bswap_32( hash32_d7[ lane ] ) <= targ32_d7 )
|
||||
{
|
||||
extr_lane_16x32( phash, hash32, lane, 256 );
|
||||
casti_m256i( phash, 0 ) =
|
||||
_mm256_shuffle_epi8( casti_m256i( phash, 0 ), bswap_shuf );
|
||||
casti_m256i( phash, 0 ) = mm256_bswap_32( casti_m256i( phash, 0 ) );
|
||||
if ( likely( valid_hash( phash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
@@ -301,8 +298,6 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
||||
const bool bench = opt_benchmark;
|
||||
const __m256i last_byte = v256_32( 0x80000000 );
|
||||
const __m256i eight = v256_32( 8 );
|
||||
const __m256i bswap_shuf = mm256_bcast_m128( _mm_set_epi64x(
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) );
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
vdata[i] = v256_32( pdata[i] );
|
||||
@@ -327,29 +322,29 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
||||
istate[6] = v256_32( sha256_iv[6] );
|
||||
istate[7] = v256_32( sha256_iv[7] );
|
||||
|
||||
sha256_8way_transform_le( mstate1, vdata, istate );
|
||||
sha256_8x32_transform_le( mstate1, vdata, istate );
|
||||
|
||||
// Do 3 rounds on the first 12 bytes of the next block
|
||||
sha256_8way_prehash_3rounds( mstate2, mexp_pre, vdata + 16, mstate1 );
|
||||
sha256_8x32_prehash_3rounds( mstate2, mexp_pre, vdata + 16, mstate1 );
|
||||
|
||||
do
|
||||
{
|
||||
// 1. final 16 bytes of data, with padding
|
||||
sha256_8way_final_rounds( block, vdata+16, mstate1, mstate2,
|
||||
sha256_8x32_final_rounds( block, vdata+16, mstate1, mstate2,
|
||||
mexp_pre );
|
||||
|
||||
// 2. 32 byte hash from 1.
|
||||
sha256_8way_transform_le( block, block, istate );
|
||||
sha256_8x32_transform_le( block, block, istate );
|
||||
|
||||
// 3. 32 byte hash from 2.
|
||||
if ( unlikely( sha256_8way_transform_le_short(
|
||||
if ( unlikely( sha256_8x32_transform_le_short(
|
||||
hash32, block, istate, ptarget ) ) )
|
||||
{
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
||||
casti_m256i( lane_hash, 0 ) =
|
||||
_mm256_shuffle_epi8( casti_m256i( lane_hash, 0 ), bswap_shuf );
|
||||
mm256_bswap_32( casti_m256i( lane_hash, 0 ) );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
@@ -419,8 +414,8 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
||||
do
|
||||
{
|
||||
sha256_4x32_final_rounds( block, vdata+16, mhash1, mhash2, mexp_pre );
|
||||
sha256_4way_transform_le( block, block, iv );
|
||||
sha256_4way_transform_le( hash32, block, iv );
|
||||
sha256_4x32_transform_le( block, block, iv );
|
||||
sha256_4x32_transform_le( hash32, block, iv );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
{
|
||||
|
||||
@@ -83,15 +83,13 @@ void sha512_opt_transform_be( uint64_t *state_out, const void *input,
|
||||
const uint64_t *state_in )
|
||||
{
|
||||
__m256i STATE0, STATE1;
|
||||
__m256i MSG, TMP, BSWAP64;
|
||||
__m256i MSG, TMP;
|
||||
__m256i TMSG0, TMSG1, TMSG2, TMSG3;
|
||||
__m256i ABEF_SAVE, CDGH_SAVE;
|
||||
|
||||
// Load initial values
|
||||
TMP = _mm256_load_si256( (__m256i*) &state_in[0] );
|
||||
STATE1 = _mm256_load_si256( (__m256i*) &state_in[4] );
|
||||
BSWAP64 = mm256_bcast_m128( _mm_set_epi64x( 0x08090a0b0c0d0e0f,
|
||||
0x0001020304050607 ) );
|
||||
TMP = _mm256_permute4x64_epi64( TMP, 0xB1 ); // CDAB
|
||||
STATE1 = _mm256_permute4x64_epi64( STATE1, 0x1B ); // EFGH
|
||||
STATE0 = _mm256_permute2x128_si256( TMP, STATE1, 0x21 ); // ABEF
|
||||
@@ -103,7 +101,7 @@ void sha512_opt_transform_be( uint64_t *state_out, const void *input,
|
||||
|
||||
// Rounds 0-3
|
||||
TMSG0 = _mm256_load_si256( (const __m256i*) (input+0) );
|
||||
TMSG0 = _mm256_shuffle_epi8( TMSG0, BSWAP64 );
|
||||
TMSG0 = mm256_bswap_64( TMSG0 );
|
||||
MSG = _mm256_add_epi64( TMSG0, casti_m256i( K512, 0 ) );
|
||||
STATE1 = _mm256_sha512rnds2_epi64( STATE1, STATE0,
|
||||
_mm256_castsi256_si128 (MSG ) );
|
||||
@@ -113,7 +111,7 @@ void sha512_opt_transform_be( uint64_t *state_out, const void *input,
|
||||
|
||||
// Rounds 4-7
|
||||
TMSG1 = _mm256_load_si256( (const __m256i*) (input+16) );
|
||||
TMSG1 = _mm256_shuffle_epi8( TMSG1, BSWAP64 );
|
||||
TMSG1 = mm256_bswap_64( TMSG1 );
|
||||
MSG = _mm256_add_epi64( TMSG1, casti_m256i( K512, 1 ) );
|
||||
STATE1 = _mm256_sha512rnds2_epi64( STATE1, STATE0,
|
||||
_mm256_castsi256_si128( MSG ) );
|
||||
@@ -124,7 +122,7 @@ void sha512_opt_transform_be( uint64_t *state_out, const void *input,
|
||||
|
||||
// Rounds 8-11
|
||||
TMSG2 = _mm256_load_si256( (const __m256i*) (input+32) );
|
||||
TMSG2 = _mm256_shuffle_epi8( TMSG2, BSWAP64 );
|
||||
TMSG2 = mm256_bswap_64( TMSG2 );
|
||||
MSG = _mm256_add_epi64( TMSG2, casti_m256i( K512, 2 ) );
|
||||
STATE1 = _mm256_sha512rnds2_epi64( STATE1, STATE0,
|
||||
_mm256_castsi256_si128( MSG ) );
|
||||
@@ -135,7 +133,7 @@ void sha512_opt_transform_be( uint64_t *state_out, const void *input,
|
||||
|
||||
// Rounds 12-15
|
||||
TMSG3 = _mm256_load_si256( (const __m256i*) (input+48) );
|
||||
TMSG3 = _mm256_shuffle_epi8( TMSG3, BSWAP64 );
|
||||
TMSG3 = mm256_bswap_64( TMSG3 );
|
||||
MSG = _mm256_add_epi64( TMSG3, casti_m256i( K512, 3 ) );
|
||||
STATE1 = _mm256_sha512rnds2_epi64( STATE1, STATE0,
|
||||
_mm256_castsi256_si128( MSG ) );
|
||||
@@ -735,8 +733,6 @@ void sha512_8x64_close( sha512_8x64_context *sc, void *dst )
|
||||
unsigned ptr;
|
||||
const int buf_size = 128;
|
||||
const int pad = buf_size - 16;
|
||||
const __m512i shuff_bswap64 = mm512_bcast_m128( _mm_set_epi64x(
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 ) );
|
||||
|
||||
ptr = (unsigned)sc->count & (buf_size - 1U);
|
||||
sc->buf[ ptr>>3 ] = v512_64( 0x80 );
|
||||
@@ -750,10 +746,8 @@ void sha512_8x64_close( sha512_8x64_context *sc, void *dst )
|
||||
else
|
||||
memset_zero_512( sc->buf + (ptr>>3), (pad - ptr) >> 3 );
|
||||
|
||||
sc->buf[ pad >> 3 ] = _mm512_shuffle_epi8(
|
||||
v512_64( sc->count >> 61 ), shuff_bswap64 );
|
||||
sc->buf[ ( pad+8 ) >> 3 ] = _mm512_shuffle_epi8(
|
||||
v512_64( sc->count << 3 ), shuff_bswap64 );
|
||||
sc->buf[ pad >> 3 ] = v512_64( bswap_64( sc->count >> 61 ) );
|
||||
sc->buf[ ( pad+8 ) >> 3 ] = v512_64( bswap_64( sc->count << 3 ) );
|
||||
sha512_8x64_round( sc, sc->buf, sc->val );
|
||||
|
||||
mm512_block_bswap_64( dst, sc->val );
|
||||
@@ -957,8 +951,6 @@ void sha512_4x64_close( sha512_4x64_context *sc, void *dst )
|
||||
unsigned ptr;
|
||||
const int buf_size = 128;
|
||||
const int pad = buf_size - 16;
|
||||
const __m256i shuff_bswap64 = mm256_bcast_m128( _mm_set_epi64x(
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 ) );
|
||||
|
||||
ptr = (unsigned)sc->count & (buf_size - 1U);
|
||||
sc->buf[ ptr>>3 ] = v256_64( 0x80 );
|
||||
@@ -972,10 +964,8 @@ void sha512_4x64_close( sha512_4x64_context *sc, void *dst )
|
||||
else
|
||||
memset_zero_256( sc->buf + (ptr>>3), (pad - ptr) >> 3 );
|
||||
|
||||
sc->buf[ pad >> 3 ] = _mm256_shuffle_epi8(
|
||||
v256_64( sc->count >> 61 ), shuff_bswap64 );
|
||||
sc->buf[ ( pad+8 ) >> 3 ] = _mm256_shuffle_epi8(
|
||||
v256_64( sc->count << 3 ), shuff_bswap64 );
|
||||
sc->buf[ pad >> 3 ] = v256_64( bswap_64( sc->count >> 61 ) );
|
||||
sc->buf[ ( pad+8 ) >> 3 ] = v256_64( bswap_64( sc->count << 3 ) );
|
||||
sha512_4x64_round( sc, sc->buf, sc->val );
|
||||
|
||||
mm256_block_bswap_64( dst, sc->val );
|
||||
@@ -1138,8 +1128,8 @@ void sha512_2x64_close( sha512_2x64_context *sc, void *dst )
|
||||
else
|
||||
v128_memset_zero( sc->buf + (ptr>>3), (pad - ptr) >> 3 );
|
||||
|
||||
sc->buf[ pad >> 3 ] = v128_bswap64( v128_64( sc->count >> 61 ) );
|
||||
sc->buf[ ( pad+8 ) >> 3 ] = v128_bswap64( v128_64( sc->count << 3 ) );
|
||||
sc->buf[ pad >> 3 ] = v128_64( bswap_64( sc->count >> 61 ) );
|
||||
sc->buf[ ( pad+8 ) >> 3 ] = v128_64( bswap_64( sc->count << 3 ) );
|
||||
sha512_2x64_round( sc, sc->buf, sc->val );
|
||||
|
||||
v128_block_bswap64( castp_v128u64( dst ), sc->val );
|
||||
|
||||
@@ -36,7 +36,6 @@ typedef struct
|
||||
uint64_t count;
|
||||
bool initialized;
|
||||
} sha512_8x64_context __attribute__ ((aligned (128)));
|
||||
#define sha512_8way_context sha512_8x64_context
|
||||
|
||||
void sha512_8x64_init( sha512_8x64_context *sc);
|
||||
void sha512_8x64_update( sha512_8x64_context *sc, const void *data,
|
||||
@@ -45,10 +44,6 @@ void sha512_8x64_close( sha512_8x64_context *sc, void *dst );
|
||||
void sha512_8x64_ctx( sha512_8x64_context *sc, void *dst, const void *data,
|
||||
size_t len );
|
||||
|
||||
#define sha512_8way_init sha512_8x64_init
|
||||
#define sha512_8way_update sha512_8x64_update
|
||||
#define sha512_8way_close sha512_8x64_close
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
#if defined (__AVX2__)
|
||||
@@ -62,7 +57,6 @@ typedef struct
|
||||
uint64_t count;
|
||||
bool initialized;
|
||||
} sha512_4x64_context __attribute__ ((aligned (64)));
|
||||
#define sha512_4way_context sha512_4x64_context
|
||||
|
||||
void sha512_4x64_init( sha512_4x64_context *sc);
|
||||
void sha512_4x64_update( sha512_4x64_context *sc, const void *data,
|
||||
@@ -71,10 +65,6 @@ void sha512_4x64_close( sha512_4x64_context *sc, void *dst );
|
||||
void sha512_4x64_ctx( sha512_4x64_context *sc, void *dst, const void *data,
|
||||
size_t len );
|
||||
|
||||
#define sha512_4way_init sha512_4x64_init
|
||||
#define sha512_4way_update sha512_4x64_update
|
||||
#define sha512_4way_close sha512_4x64_close
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
typedef struct
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
#if defined(SHA512256D_8WAY)
|
||||
|
||||
static void sha512256d_8way_init( sha512_8way_context *ctx )
|
||||
static void sha512256d_8x64_init( sha512_8x64_context *ctx )
|
||||
{
|
||||
ctx->count = 0;
|
||||
ctx->initialized = true;
|
||||
@@ -33,7 +33,7 @@ int scanhash_sha512256d_8way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint64_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
sha512_8way_context ctx;
|
||||
sha512_8x64_context ctx;
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint64_t *hash_q3 = &(hash[3*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -53,13 +53,13 @@ int scanhash_sha512256d_8way( struct work *work, uint32_t max_nonce,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
sha512256d_8way_init( &ctx );
|
||||
sha512_8way_update( &ctx, vdata, 80 );
|
||||
sha512_8way_close( &ctx, hash );
|
||||
sha512256d_8x64_init( &ctx );
|
||||
sha512_8x64_update( &ctx, vdata, 80 );
|
||||
sha512_8x64_close( &ctx, hash );
|
||||
|
||||
sha512256d_8way_init( &ctx );
|
||||
sha512_8way_update( &ctx, hash, 32 );
|
||||
sha512_8way_close( &ctx, hash );
|
||||
sha512256d_8x64_init( &ctx );
|
||||
sha512_8x64_update( &ctx, hash, 32 );
|
||||
sha512_8x64_close( &ctx, hash );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash_q3[ lane ] <= targ_q3 && !bench ) )
|
||||
@@ -82,7 +82,7 @@ int scanhash_sha512256d_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#elif defined(SHA512256D_4WAY)
|
||||
|
||||
static void sha512256d_4way_init( sha512_4way_context *ctx )
|
||||
static void sha512256d_4x64_init( sha512_4x64_context *ctx )
|
||||
{
|
||||
ctx->count = 0;
|
||||
ctx->initialized = true;
|
||||
@@ -101,7 +101,7 @@ int scanhash_sha512256d_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint64_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
sha512_4way_context ctx;
|
||||
sha512_4x64_context ctx;
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint64_t *hash_q3 = &(hash[3*4]);
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -119,13 +119,13 @@ int scanhash_sha512256d_4way( struct work *work, uint32_t max_nonce,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), casti_m256i( vdata,9 ) );
|
||||
do
|
||||
{
|
||||
sha512256d_4way_init( &ctx );
|
||||
sha512_4way_update( &ctx, vdata, 80 );
|
||||
sha512_4way_close( &ctx, hash );
|
||||
sha512256d_4x64_init( &ctx );
|
||||
sha512_4x64_update( &ctx, vdata, 80 );
|
||||
sha512_4x64_close( &ctx, hash );
|
||||
|
||||
sha512256d_4way_init( &ctx );
|
||||
sha512_4way_update( &ctx, hash, 32 );
|
||||
sha512_4way_close( &ctx, hash );
|
||||
sha512256d_4x64_init( &ctx );
|
||||
sha512_4x64_update( &ctx, hash, 32 );
|
||||
sha512_4x64_close( &ctx, hash );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash_q3[ lane ] <= targ_q3 )
|
||||
|
||||
@@ -430,9 +430,9 @@ do { \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
shabal_16way_init( void *cc, unsigned size )
|
||||
shabal_16x32_init( void *cc, unsigned size )
|
||||
{
|
||||
shabal_16way_context *sc = (shabal_16way_context*)cc;
|
||||
shabal_16x32_context *sc = (shabal_16x32_context*)cc;
|
||||
|
||||
if ( size == 512 )
|
||||
{ // copy immediate constants directly to working registers later.
|
||||
@@ -494,9 +494,9 @@ shabal_16way_init( void *cc, unsigned size )
|
||||
}
|
||||
|
||||
static void
|
||||
shabal_16way_core( void *cc, const unsigned char *data, size_t len )
|
||||
shabal_16x32_core( void *cc, const unsigned char *data, size_t len )
|
||||
{
|
||||
shabal_16way_context *sc = (shabal_16way_context*)cc;
|
||||
shabal_16x32_context *sc = (shabal_16x32_context*)cc;
|
||||
__m512i *buf;
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
const int buf_size = 64;
|
||||
@@ -544,10 +544,10 @@ shabal_16way_core( void *cc, const unsigned char *data, size_t len )
|
||||
}
|
||||
|
||||
static void
|
||||
shabal_16way_close( void *cc, unsigned ub, unsigned n, void *dst,
|
||||
shabal_16x32_close( void *cc, unsigned ub, unsigned n, void *dst,
|
||||
unsigned size_words )
|
||||
{
|
||||
shabal_16way_context *sc = (shabal_16way_context*)cc;
|
||||
shabal_16x32_context *sc = (shabal_16x32_context*)cc;
|
||||
__m512i *buf;
|
||||
const int buf_size = 64;
|
||||
size_t ptr;
|
||||
@@ -590,52 +590,39 @@ shabal_16way_close( void *cc, unsigned ub, unsigned n, void *dst,
|
||||
}
|
||||
|
||||
void
|
||||
shabal256_16way_init( void *cc )
|
||||
shabal256_16x32_init( void *cc )
|
||||
{
|
||||
shabal_16way_init(cc, 256);
|
||||
shabal_16x32_init(cc, 256);
|
||||
}
|
||||
|
||||
void
|
||||
shabal256_16way_update( void *cc, const void *data, size_t len )
|
||||
shabal256_16x32_update( void *cc, const void *data, size_t len )
|
||||
{
|
||||
shabal_16way_core( cc, data, len );
|
||||
shabal_16x32_core( cc, data, len );
|
||||
}
|
||||
|
||||
void
|
||||
shabal256_16way_close( void *cc, void *dst )
|
||||
shabal256_16x32_close( void *cc, void *dst )
|
||||
{
|
||||
shabal_16way_close(cc, 0, 0, dst, 8);
|
||||
shabal_16x32_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
void
|
||||
shabal256_16way_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
||||
void *dst )
|
||||
shabal512_16x32_init(void *cc)
|
||||
{
|
||||
shabal_16way_close(cc, ub, n, dst, 8);
|
||||
shabal_16x32_init(cc, 512);
|
||||
}
|
||||
|
||||
void
|
||||
shabal512_16way_init(void *cc)
|
||||
shabal512_16x32_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
shabal_16way_init(cc, 512);
|
||||
shabal_16x32_core(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
shabal512_16way_update(void *cc, const void *data, size_t len)
|
||||
shabal512_16x32_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_16way_core(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
shabal512_16way_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_16way_close(cc, 0, 0, dst, 16);
|
||||
}
|
||||
|
||||
void
|
||||
shabal512_16way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
shabal_16way_close(cc, ub, n, dst, 16);
|
||||
shabal_16x32_close(cc, 0, 0, dst, 16);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1031,9 +1018,9 @@ do { \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
shabal_8way_init( void *cc, unsigned size )
|
||||
shabal_8x32_init( void *cc, unsigned size )
|
||||
{
|
||||
shabal_8way_context *sc = (shabal_8way_context*)cc;
|
||||
shabal_8x32_context *sc = (shabal_8x32_context*)cc;
|
||||
|
||||
if ( size == 512 )
|
||||
{ // copy immediate constants directly to working registers later.
|
||||
@@ -1095,9 +1082,9 @@ shabal_8way_init( void *cc, unsigned size )
|
||||
}
|
||||
|
||||
static void
|
||||
shabal_8way_core( void *cc, const unsigned char *data, size_t len )
|
||||
shabal_8x32_core( void *cc, const unsigned char *data, size_t len )
|
||||
{
|
||||
shabal_8way_context *sc = (shabal_8way_context*)cc;
|
||||
shabal_8x32_context *sc = (shabal_8x32_context*)cc;
|
||||
__m256i *buf;
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
const int buf_size = 64;
|
||||
@@ -1146,10 +1133,10 @@ shabal_8way_core( void *cc, const unsigned char *data, size_t len )
|
||||
}
|
||||
|
||||
static void
|
||||
shabal_8way_close( void *cc, unsigned ub, unsigned n, void *dst,
|
||||
shabal_8x32_close( void *cc, unsigned ub, unsigned n, void *dst,
|
||||
unsigned size_words )
|
||||
{
|
||||
shabal_8way_context *sc = (shabal_8way_context*)cc;
|
||||
shabal_8x32_context *sc = (shabal_8x32_context*)cc;
|
||||
__m256i *buf;
|
||||
const int buf_size = 64;
|
||||
size_t ptr;
|
||||
@@ -1192,52 +1179,39 @@ shabal_8way_close( void *cc, unsigned ub, unsigned n, void *dst,
|
||||
}
|
||||
|
||||
void
|
||||
shabal256_8way_init( void *cc )
|
||||
shabal256_8x32_init( void *cc )
|
||||
{
|
||||
shabal_8way_init(cc, 256);
|
||||
shabal_8x32_init(cc, 256);
|
||||
}
|
||||
|
||||
void
|
||||
shabal256_8way_update( void *cc, const void *data, size_t len )
|
||||
shabal256_8x32_update( void *cc, const void *data, size_t len )
|
||||
{
|
||||
shabal_8way_core( cc, data, len );
|
||||
shabal_8x32_core( cc, data, len );
|
||||
}
|
||||
|
||||
void
|
||||
shabal256_8way_close( void *cc, void *dst )
|
||||
shabal256_8x32_close( void *cc, void *dst )
|
||||
{
|
||||
shabal_8way_close(cc, 0, 0, dst, 8);
|
||||
shabal_8x32_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
void
|
||||
shabal256_8way_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
||||
void *dst )
|
||||
shabal512_8x32_init(void *cc)
|
||||
{
|
||||
shabal_8way_close(cc, ub, n, dst, 8);
|
||||
shabal_8x32_init(cc, 512);
|
||||
}
|
||||
|
||||
void
|
||||
shabal512_8way_init(void *cc)
|
||||
shabal512_8x32_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
shabal_8way_init(cc, 512);
|
||||
shabal_8x32_core(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
shabal512_8way_update(void *cc, const void *data, size_t len)
|
||||
shabal512_8x32_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_8way_core(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
shabal512_8way_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_8way_close(cc, 0, 0, dst, 16);
|
||||
}
|
||||
|
||||
void
|
||||
shabal512_8way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
shabal_8way_close(cc, ub, n, dst, 16);
|
||||
shabal_8x32_close(cc, 0, 0, dst, 16);
|
||||
}
|
||||
|
||||
#endif // AVX2
|
||||
@@ -1674,9 +1648,9 @@ static const sph_u32 C_init_512[] = {
|
||||
*/
|
||||
|
||||
static void
|
||||
shabal_4way_init( void *cc, unsigned size )
|
||||
shabal_4x32_init( void *cc, unsigned size )
|
||||
{
|
||||
shabal_4way_context *sc = (shabal_4way_context*)cc;
|
||||
shabal_4x32_context *sc = (shabal_4x32_context*)cc;
|
||||
|
||||
if ( size == 512 )
|
||||
{ // copy immediate constants directly to working registers later.
|
||||
@@ -1786,9 +1760,9 @@ shabal_4way_init( void *cc, unsigned size )
|
||||
}
|
||||
|
||||
static void
|
||||
shabal_4way_core( void *cc, const unsigned char *data, size_t len )
|
||||
shabal_4x32_core( void *cc, const unsigned char *data, size_t len )
|
||||
{
|
||||
shabal_4way_context *sc = (shabal_4way_context*)cc;
|
||||
shabal_4x32_context *sc = (shabal_4x32_context*)cc;
|
||||
v128_t *buf;
|
||||
v128_t *vdata = (v128_t*)data;
|
||||
const int buf_size = 64;
|
||||
@@ -1838,10 +1812,10 @@ shabal_4way_core( void *cc, const unsigned char *data, size_t len )
|
||||
}
|
||||
|
||||
static void
|
||||
shabal_4way_close( void *cc, unsigned ub, unsigned n, void *dst,
|
||||
shabal_4x32_close( void *cc, unsigned ub, unsigned n, void *dst,
|
||||
unsigned size_words )
|
||||
{
|
||||
shabal_4way_context *sc = (shabal_4way_context*)cc;
|
||||
shabal_4x32_context *sc = (shabal_4x32_context*)cc;
|
||||
v128_t *buf;
|
||||
const int buf_size = 64;
|
||||
size_t ptr;
|
||||
@@ -1884,52 +1858,39 @@ shabal_4way_close( void *cc, unsigned ub, unsigned n, void *dst,
|
||||
}
|
||||
|
||||
void
|
||||
shabal256_4way_init( void *cc )
|
||||
shabal256_4x32_init( void *cc )
|
||||
{
|
||||
shabal_4way_init(cc, 256);
|
||||
shabal_4x32_init(cc, 256);
|
||||
}
|
||||
|
||||
void
|
||||
shabal256_4way_update( void *cc, const void *data, size_t len )
|
||||
shabal256_4x32_update( void *cc, const void *data, size_t len )
|
||||
{
|
||||
shabal_4way_core( cc, data, len );
|
||||
shabal_4x32_core( cc, data, len );
|
||||
}
|
||||
|
||||
void
|
||||
shabal256_4way_close( void *cc, void *dst )
|
||||
shabal256_4x32_close( void *cc, void *dst )
|
||||
{
|
||||
shabal_4way_close(cc, 0, 0, dst, 8);
|
||||
shabal_4x32_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
void
|
||||
shabal256_4way_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
||||
void *dst )
|
||||
shabal512_4x32_init(void *cc)
|
||||
{
|
||||
shabal_4way_close(cc, ub, n, dst, 8);
|
||||
shabal_4x32_init(cc, 512);
|
||||
}
|
||||
|
||||
void
|
||||
shabal512_4way_init(void *cc)
|
||||
shabal512_4x32_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
shabal_4way_init(cc, 512);
|
||||
shabal_4x32_core(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
shabal512_4way_update(void *cc, const void *data, size_t len)
|
||||
shabal512_4x32_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_4way_core(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
shabal512_4way_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_4way_close(cc, 0, 0, dst, 16);
|
||||
}
|
||||
|
||||
void
|
||||
shabal512_4way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
shabal_4way_close(cc, ub, n, dst, 16);
|
||||
shabal_4x32_close(cc, 0, 0, dst, 16);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -4,10 +4,6 @@
|
||||
#include <stddef.h>
|
||||
#include "simd-utils.h"
|
||||
|
||||
#define SPH_SIZE_shabal256 256
|
||||
|
||||
#define SPH_SIZE_shabal512 512
|
||||
|
||||
#if defined(SIMD512)
|
||||
|
||||
typedef struct {
|
||||
@@ -16,22 +12,27 @@ typedef struct {
|
||||
uint32_t Whigh, Wlow;
|
||||
size_t ptr;
|
||||
bool state_loaded;
|
||||
} shabal_16way_context __attribute__ ((aligned (64)));
|
||||
} shabal_16x32_context __attribute__ ((aligned (64)));
|
||||
|
||||
typedef shabal_16way_context shabal256_16way_context;
|
||||
typedef shabal_16way_context shabal512_16way_context;
|
||||
typedef shabal_16x32_context shabal256_16x32_context;
|
||||
typedef shabal_16x32_context shabal512_16x32_context;
|
||||
|
||||
void shabal256_16way_init( void *cc );
|
||||
void shabal256_16way_update( void *cc, const void *data, size_t len );
|
||||
void shabal256_16way_close( void *cc, void *dst );
|
||||
void shabal256_16way_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
||||
void *dst );
|
||||
void shabal256_16x32_init( void *cc );
|
||||
void shabal256_16x32_update( void *cc, const void *data, size_t len );
|
||||
void shabal256_16x32_close( void *cc, void *dst );
|
||||
|
||||
void shabal512_16way_init( void *cc );
|
||||
void shabal512_16way_update( void *cc, const void *data, size_t len );
|
||||
void shabal512_16way_close( void *cc, void *dst );
|
||||
void shabal512_16way_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
||||
void *dst );
|
||||
void shabal512_16x32_init( void *cc );
|
||||
void shabal512_16x32_update( void *cc, const void *data, size_t len );
|
||||
void shabal512_16x32_close( void *cc, void *dst );
|
||||
|
||||
#define shabal256_16way_context shabal256_16x32_context
|
||||
#define shabal256_16way_init shabal256_16x32_init
|
||||
#define shabal256_16way_update shabal256_16x32_update
|
||||
#define shabal256_16way_close shabal256_16x32_close
|
||||
#define shabal512_16way_context shabal512_16x32_context
|
||||
#define shabal512_16way_init shabal512_16x32_init
|
||||
#define shabal512_16way_update shabal512_16x32_update
|
||||
#define shabal512_16way_close shabal512_16x32_close
|
||||
|
||||
#endif
|
||||
|
||||
@@ -43,22 +44,27 @@ typedef struct {
|
||||
uint32_t Whigh, Wlow;
|
||||
size_t ptr;
|
||||
bool state_loaded;
|
||||
} shabal_8way_context __attribute__ ((aligned (64)));
|
||||
} shabal_8x32_context __attribute__ ((aligned (64)));
|
||||
|
||||
typedef shabal_8way_context shabal256_8way_context;
|
||||
typedef shabal_8way_context shabal512_8way_context;
|
||||
typedef shabal_8x32_context shabal256_8x32_context;
|
||||
typedef shabal_8x32_context shabal512_8x32_context;
|
||||
|
||||
void shabal256_8way_init( void *cc );
|
||||
void shabal256_8way_update( void *cc, const void *data, size_t len );
|
||||
void shabal256_8way_close( void *cc, void *dst );
|
||||
void shabal256_8way_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
||||
void *dst );
|
||||
void shabal256_8x32_init( void *cc );
|
||||
void shabal256_8x32_update( void *cc, const void *data, size_t len );
|
||||
void shabal256_8x32_close( void *cc, void *dst );
|
||||
|
||||
void shabal512_8way_init( void *cc );
|
||||
void shabal512_8way_update( void *cc, const void *data, size_t len );
|
||||
void shabal512_8way_close( void *cc, void *dst );
|
||||
void shabal512_8way_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
||||
void *dst );
|
||||
void shabal512_8x32_init( void *cc );
|
||||
void shabal512_8x32_update( void *cc, const void *data, size_t len );
|
||||
void shabal512_8x32_close( void *cc, void *dst );
|
||||
|
||||
#define shabal256_8way_context shabal256_8x32_context
|
||||
#define shabal256_8way_init shabal256_8x32_init
|
||||
#define shabal256_8way_update shabal256_8x32_update
|
||||
#define shabal256_8way_close shabal256_8x32_close
|
||||
#define shabal512_8way_context shabal512_8x32_context
|
||||
#define shabal512_8way_init shabal512_8x32_init
|
||||
#define shabal512_8way_update shabal512_8x32_update
|
||||
#define shabal512_8way_close shabal512_8x32_close
|
||||
|
||||
#endif
|
||||
|
||||
@@ -70,59 +76,29 @@ typedef struct {
|
||||
uint32_t Whigh, Wlow;
|
||||
size_t ptr;
|
||||
bool state_loaded;
|
||||
} shabal_4way_context;
|
||||
} shabal_4x32_context;
|
||||
|
||||
typedef shabal_4way_context shabal256_4way_context;
|
||||
typedef shabal_4way_context shabal512_4way_context;
|
||||
typedef shabal_4x32_context shabal256_4x32_context;
|
||||
typedef shabal_4x32_context shabal512_4x32_context;
|
||||
|
||||
void shabal256_4way_init( void *cc );
|
||||
void shabal256_4way_update( void *cc, const void *data, size_t len );
|
||||
void shabal256_4way_close( void *cc, void *dst );
|
||||
void shabal256_4way_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
||||
void *dst );
|
||||
void shabal256_4x32_init( void *cc );
|
||||
void shabal256_4x32_update( void *cc, const void *data, size_t len );
|
||||
void shabal256_4x32_close( void *cc, void *dst );
|
||||
|
||||
void shabal512_4way_init( void *cc );
|
||||
void shabal512_4way_update( void *cc, const void *data, size_t len );
|
||||
void shabal512_4way_close( void *cc, void *dst );
|
||||
void shabal512_4way_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
||||
void *dst );
|
||||
void shabal512_4x32_init( void *cc );
|
||||
void shabal512_4x32_update( void *cc, const void *data, size_t len );
|
||||
void shabal512_4x32_close( void *cc, void *dst );
|
||||
|
||||
#define shabal256_4way_context shabal256_4x32_context
|
||||
#define shabal256_4way_init shabal256_4x32_init
|
||||
#define shabal256_4way_update shabal256_4x32_update
|
||||
#define shabal256_4way_close shabal256_4x32_close
|
||||
#define shabal512_4way_context shabal512_4x32_context
|
||||
#define shabal512_4way_init shabal512_4x32_init
|
||||
#define shabal512_4way_update shabal512_4x32_update
|
||||
#define shabal512_4way_close shabal512_4x32_close
|
||||
|
||||
#endif
|
||||
|
||||
// SSE or NEON
|
||||
|
||||
/* No __mullo_pi32
|
||||
|
||||
typedef struct
|
||||
{
|
||||
v64_t buf[16] __attribute__ ((aligned (64)));
|
||||
v64_t A[12], B[16], C[16];
|
||||
uint32_t Whigh, Wlow;
|
||||
size_t ptr;
|
||||
bool state_loaded;
|
||||
} shabal_2x32_context;
|
||||
|
||||
typedef shabal_2x32_context shabal256_2x32_context;
|
||||
typedef shabal_2x32_context shabal512_2x32_context;
|
||||
|
||||
void shabal256_2x32_init( void *cc );
|
||||
void shabal256_2x32_update( void *cc, const void *data, size_t len );
|
||||
void shabal256_2x32_close( void *cc, void *dst );
|
||||
void shabal256_2x32_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
||||
void *dst );
|
||||
|
||||
void shabal512_2x32_init( shabal512_2x32_context *cc );
|
||||
void shabal512_2x32_update( shabal512_2x32_context *cc, const void *data,
|
||||
size_t len );
|
||||
void shabal512_2x32_close( shabal512_2x32_context *cc, void *dst );
|
||||
void shabal512_2x32_addbits_and_close( shabal512_2x32_context *cc,
|
||||
unsigned ub, unsigned n, void *dst );
|
||||
void shabal512_2x32_ctx( shabal512_2x32_context *cc, void *dst,
|
||||
const void *data, size_t len );
|
||||
void shabal512_2x32( shabal512_2x32_context *dst, const void *data,
|
||||
size_t len );
|
||||
|
||||
*/
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -6,23 +6,23 @@
|
||||
|
||||
#if defined (SKEIN_8WAY)
|
||||
|
||||
static __thread skein512_8way_context skein512_8way_ctx
|
||||
static __thread skein512_8x64_context skein512_8x64_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void skeinhash_8way( void *state, const void *input )
|
||||
{
|
||||
uint64_t vhash64[8*8] __attribute__ ((aligned (128)));
|
||||
skein512_8way_context ctx_skein;
|
||||
memcpy( &ctx_skein, &skein512_8way_ctx, sizeof( ctx_skein ) );
|
||||
skein512_8x64_context ctx_skein;
|
||||
memcpy( &ctx_skein, &skein512_8x64_ctx, sizeof( ctx_skein ) );
|
||||
uint32_t vhash32[16*8] __attribute__ ((aligned (128)));
|
||||
sha256_8way_context ctx_sha256;
|
||||
sha256_8x32_context ctx_sha256;
|
||||
|
||||
skein512_8way_final16( &ctx_skein, vhash64, input + (64*8) );
|
||||
skein512_8x64_final16( &ctx_skein, vhash64, input + (64*8) );
|
||||
rintrlv_8x64_8x32( vhash32, vhash64, 512 );
|
||||
|
||||
sha256_8way_init( &ctx_sha256 );
|
||||
sha256_8way_update( &ctx_sha256, vhash32, 64 );
|
||||
sha256_8way_close( &ctx_sha256, state );
|
||||
sha256_8x32_init( &ctx_sha256 );
|
||||
sha256_8x32_update( &ctx_sha256, vhash32, 64 );
|
||||
sha256_8x32_close( &ctx_sha256, state );
|
||||
}
|
||||
|
||||
int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
|
||||
@@ -46,7 +46,7 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
skein512_8way_prehash64( &skein512_8way_ctx, vdata );
|
||||
skein512_8x64_prehash64( &skein512_8x64_ctx, vdata );
|
||||
do
|
||||
{
|
||||
skeinhash_8way( hash, vdata );
|
||||
@@ -73,14 +73,14 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#elif defined (SKEIN_4WAY)
|
||||
|
||||
static __thread skein512_4way_context skein512_4way_ctx
|
||||
static __thread skein512_4x64_context skein512_4x64_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void skeinhash_4way( void *state, const void *input )
|
||||
{
|
||||
uint64_t vhash64[8*4] __attribute__ ((aligned (128)));
|
||||
skein512_4way_context ctx_skein;
|
||||
memcpy( &ctx_skein, &skein512_4way_ctx, sizeof( ctx_skein ) );
|
||||
skein512_4x64_context ctx_skein;
|
||||
memcpy( &ctx_skein, &skein512_4x64_ctx, sizeof( ctx_skein ) );
|
||||
#if defined(__SHA__)
|
||||
uint32_t hash0[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[16] __attribute__ ((aligned (64)));
|
||||
@@ -88,10 +88,10 @@ void skeinhash_4way( void *state, const void *input )
|
||||
uint32_t hash3[16] __attribute__ ((aligned (64)));
|
||||
#else
|
||||
uint32_t vhash32[16*4] __attribute__ ((aligned (64)));
|
||||
sha256_4way_context ctx_sha256;
|
||||
sha256_4x32_context ctx_sha256;
|
||||
#endif
|
||||
|
||||
skein512_4way_final16( &ctx_skein, vhash64, input + (64*4) );
|
||||
skein512_4x64_final16( &ctx_skein, vhash64, input + (64*4) );
|
||||
|
||||
#if defined(__SHA__)
|
||||
|
||||
@@ -107,9 +107,9 @@ void skeinhash_4way( void *state, const void *input )
|
||||
#else
|
||||
|
||||
rintrlv_4x64_4x32( vhash32, vhash64, 512 );
|
||||
sha256_4way_init( &ctx_sha256 );
|
||||
sha256_4way_update( &ctx_sha256, vhash32, 64 );
|
||||
sha256_4way_close( &ctx_sha256, state );
|
||||
sha256_4x32_init( &ctx_sha256 );
|
||||
sha256_4x32_update( &ctx_sha256, vhash32, 64 );
|
||||
sha256_4x32_close( &ctx_sha256, state );
|
||||
|
||||
#endif
|
||||
}
|
||||
@@ -132,7 +132,7 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_prehash64( &skein512_4way_ctx, vdata );
|
||||
skein512_4x64_prehash64( &skein512_4x64_ctx, vdata );
|
||||
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
|
||||
@@ -513,7 +513,7 @@ do { \
|
||||
|
||||
#if defined(SIMD512)
|
||||
|
||||
void skein256_8way_init( skein256_8way_context *sc )
|
||||
void skein256_8x64_init( skein256_8x64_context *sc )
|
||||
{
|
||||
sc->h0 = _mm512_set1_epi64( 0xCCD044A12FDB3E13 );
|
||||
sc->h1 = _mm512_set1_epi64( 0xE83590301A79A9EB );
|
||||
@@ -527,7 +527,7 @@ void skein256_8way_init( skein256_8way_context *sc )
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
void skein512_8way_init( skein512_8way_context *sc )
|
||||
void skein512_8x64_init( skein512_8x64_context *sc )
|
||||
{
|
||||
sc->h0 = _mm512_set1_epi64( 0x4903ADFF749C51CE );
|
||||
sc->h1 = _mm512_set1_epi64( 0x0D95DE399746DF03 );
|
||||
@@ -542,7 +542,7 @@ void skein512_8way_init( skein512_8way_context *sc )
|
||||
}
|
||||
|
||||
static void
|
||||
skein_big_core_8way( skein512_8way_context *sc, const void *data,
|
||||
skein_big_core_8x64( skein512_8x64_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
@@ -587,7 +587,7 @@ skein_big_core_8way( skein512_8way_context *sc, const void *data,
|
||||
}
|
||||
|
||||
static void
|
||||
skein_big_close_8way( skein512_8way_context *sc, unsigned ub, unsigned n,
|
||||
skein_big_close_8x64( skein512_8x64_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_len )
|
||||
{
|
||||
__m512i *buf;
|
||||
@@ -621,7 +621,7 @@ skein_big_close_8way( skein512_8way_context *sc, unsigned ub, unsigned n,
|
||||
memcpy_512( dst, buf, out_len >> 3 );
|
||||
}
|
||||
|
||||
void skein512_8way_full( skein512_8way_context *sc, void *out, const void *data,
|
||||
void skein512_8x64_full( skein512_8x64_context *sc, void *out, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m512i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
@@ -698,7 +698,7 @@ void skein512_8way_full( skein512_8way_context *sc, void *out, const void *data,
|
||||
}
|
||||
|
||||
void
|
||||
skein512_8way_prehash64( skein512_8way_context *sc, const void *data )
|
||||
skein512_8x64_prehash64( skein512_8x64_context *sc, const void *data )
|
||||
{
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
__m512i *buf = sc->buf;
|
||||
@@ -732,7 +732,7 @@ skein512_8way_prehash64( skein512_8way_context *sc, const void *data )
|
||||
}
|
||||
|
||||
void
|
||||
skein512_8way_final16( skein512_8way_context *sc, void *output,
|
||||
skein512_8x64_final16( skein512_8x64_context *sc, void *output,
|
||||
const void *data )
|
||||
{
|
||||
__m512i *in = (__m512i*)data;
|
||||
@@ -778,34 +778,34 @@ skein512_8way_final16( skein512_8way_context *sc, void *output,
|
||||
|
||||
|
||||
void
|
||||
skein256_8way_update(void *cc, const void *data, size_t len)
|
||||
skein256_8x64_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
skein_big_core_8way(cc, data, len);
|
||||
skein_big_core_8x64(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
skein256_8way_close(void *cc, void *dst)
|
||||
skein256_8x64_close(void *cc, void *dst)
|
||||
{
|
||||
skein_big_close_8way(cc, 0, 0, dst, 32);
|
||||
skein_big_close_8x64(cc, 0, 0, dst, 32);
|
||||
}
|
||||
|
||||
void
|
||||
skein512_8way_update(void *cc, const void *data, size_t len)
|
||||
skein512_8x64_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
skein_big_core_8way(cc, data, len);
|
||||
skein_big_core_8x64(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
skein512_8way_close(void *cc, void *dst)
|
||||
skein512_8x64_close(void *cc, void *dst)
|
||||
{
|
||||
skein_big_close_8way(cc, 0, 0, dst, 64);
|
||||
skein_big_close_8x64(cc, 0, 0, dst, 64);
|
||||
}
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
void skein256_4way_init( skein256_4way_context *sc )
|
||||
void skein256_4x64_init( skein256_4x64_context *sc )
|
||||
{
|
||||
sc->h0 = _mm256_set1_epi64x( 0xCCD044A12FDB3E13 );
|
||||
sc->h1 = _mm256_set1_epi64x( 0xE83590301A79A9EB );
|
||||
@@ -819,7 +819,7 @@ void skein256_4way_init( skein256_4way_context *sc )
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
void skein512_4way_init( skein512_4way_context *sc )
|
||||
void skein512_4x64_init( skein512_4x64_context *sc )
|
||||
{
|
||||
sc->h0 = _mm256_set1_epi64x( 0x4903ADFF749C51CE );
|
||||
sc->h1 = _mm256_set1_epi64x( 0x0D95DE399746DF03 );
|
||||
@@ -835,7 +835,7 @@ void skein512_4way_init( skein512_4way_context *sc )
|
||||
|
||||
// Do not use for 128 bt data length
|
||||
static void
|
||||
skein_big_core_4way( skein512_4way_context *sc, const void *data,
|
||||
skein_big_core_4x64( skein512_4x64_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
@@ -882,7 +882,7 @@ skein_big_core_4way( skein512_4way_context *sc, const void *data,
|
||||
}
|
||||
|
||||
static void
|
||||
skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
||||
skein_big_close_4x64( skein512_4x64_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_len )
|
||||
{
|
||||
__m256i *buf;
|
||||
@@ -920,7 +920,7 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
||||
}
|
||||
|
||||
void
|
||||
skein512_4way_full( skein512_4way_context *sc, void *out, const void *data,
|
||||
skein512_4x64_full( skein512_4x64_context *sc, void *out, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m256i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
@@ -995,7 +995,7 @@ skein512_4way_full( skein512_4way_context *sc, void *out, const void *data,
|
||||
}
|
||||
|
||||
void
|
||||
skein512_4way_prehash64( skein512_4way_context *sc, const void *data )
|
||||
skein512_4x64_prehash64( skein512_4x64_context *sc, const void *data )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buf = sc->buf;
|
||||
@@ -1029,7 +1029,7 @@ skein512_4way_prehash64( skein512_4way_context *sc, const void *data )
|
||||
}
|
||||
|
||||
void
|
||||
skein512_4way_final16( skein512_4way_context *sc, void *out, const void *data )
|
||||
skein512_4x64_final16( skein512_4x64_context *sc, void *out, const void *data )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buf = sc->buf;
|
||||
@@ -1073,29 +1073,29 @@ skein512_4way_final16( skein512_4way_context *sc, void *out, const void *data )
|
||||
|
||||
// Broken for 80 bytes, use prehash.
|
||||
void
|
||||
skein256_4way_update(void *cc, const void *data, size_t len)
|
||||
skein256_4x64_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
skein_big_core_4way(cc, data, len);
|
||||
skein_big_core_4x64(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
skein256_4way_close(void *cc, void *dst)
|
||||
skein256_4x64_close(void *cc, void *dst)
|
||||
{
|
||||
skein_big_close_4way(cc, 0, 0, dst, 32);
|
||||
skein_big_close_4x64(cc, 0, 0, dst, 32);
|
||||
}
|
||||
|
||||
|
||||
// Broken for 80 & 128 bytes, use prehash or full
|
||||
void
|
||||
skein512_4way_update(void *cc, const void *data, size_t len)
|
||||
skein512_4x64_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
skein_big_core_4way(cc, data, len);
|
||||
skein_big_core_4x64(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
skein512_4way_close(void *cc, void *dst)
|
||||
skein512_4x64_close(void *cc, void *dst)
|
||||
{
|
||||
skein_big_close_4way(cc, 0, 0, dst, 64);
|
||||
skein_big_close_4x64(cc, 0, 0, dst, 64);
|
||||
}
|
||||
|
||||
#endif // AVX2
|
||||
@@ -1231,7 +1231,7 @@ void skein512_2x64_init( skein512_2x64_context *sc )
|
||||
}
|
||||
|
||||
static void
|
||||
skein_big_core_2way( skein512_2x64_context *sc, const void *data,
|
||||
skein_big_core_2x64( skein512_2x64_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
v128u64_t *vdata = (v128u64_t*)data;
|
||||
@@ -1278,7 +1278,7 @@ skein_big_core_2way( skein512_2x64_context *sc, const void *data,
|
||||
}
|
||||
|
||||
static void
|
||||
skein_big_close_2way( skein512_2x64_context *sc, unsigned ub, unsigned n,
|
||||
skein_big_close_2x64( skein512_2x64_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_len )
|
||||
{
|
||||
v128u64_t *buf;
|
||||
@@ -1471,13 +1471,13 @@ skein512_2x64_final16( skein512_2x64_context *sc, void *out, const void *data )
|
||||
void
|
||||
skein256_2x64_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
skein_big_core_2way(cc, data, len);
|
||||
skein_big_core_2x64(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
skein256_2x64_close(void *cc, void *dst)
|
||||
{
|
||||
skein_big_close_2way(cc, 0, 0, dst, 32);
|
||||
skein_big_close_2x64(cc, 0, 0, dst, 32);
|
||||
}
|
||||
|
||||
|
||||
@@ -1485,13 +1485,12 @@ skein256_2x64_close(void *cc, void *dst)
|
||||
void
|
||||
skein512_2x64_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
skein_big_core_2way(cc, data, len);
|
||||
skein_big_core_2x64(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
skein512_2x64_close(void *cc, void *dst)
|
||||
{
|
||||
skein_big_close_2way(cc, 0, 0, dst, 64);
|
||||
skein_big_close_2x64(cc, 0, 0, dst, 64);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -52,24 +52,36 @@ typedef struct
|
||||
__m512i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
size_t ptr;
|
||||
uint64_t bcount;
|
||||
} skein_8way_big_context __attribute__ ((aligned (128)));
|
||||
} skein_8x64_big_context __attribute__ ((aligned (128)));
|
||||
|
||||
typedef skein_8way_big_context skein512_8way_context;
|
||||
typedef skein_8way_big_context skein256_8way_context;
|
||||
typedef skein_8x64_big_context skein512_8x64_context;
|
||||
typedef skein_8x64_big_context skein256_8x64_context;
|
||||
|
||||
void skein512_8way_full( skein512_8way_context *sc, void *out,
|
||||
void skein512_8x64_full( skein512_8x64_context *sc, void *out,
|
||||
const void *data, size_t len );
|
||||
void skein512_8way_init( skein512_8way_context *sc );
|
||||
void skein512_8way_update( void *cc, const void *data, size_t len );
|
||||
void skein512_8way_close( void *cc, void *dst );
|
||||
void skein512_8x64_init( skein512_8x64_context *sc );
|
||||
void skein512_8x64_update( void *cc, const void *data, size_t len );
|
||||
void skein512_8x64_close( void *cc, void *dst );
|
||||
|
||||
void skein512_8way_prehash64( skein512_8way_context *sc, const void *data );
|
||||
void skein512_8way_final16( skein512_8way_context *sc, void *out,
|
||||
void skein512_8x64_prehash64( skein512_8x64_context *sc, const void *data );
|
||||
void skein512_8x64_final16( skein512_8x64_context *sc, void *out,
|
||||
const void *data );
|
||||
|
||||
void skein256_8way_init( skein256_8way_context *sc );
|
||||
void skein256_8way_update( void *cc, const void *data, size_t len );
|
||||
void skein256_8way_close( void *cc, void *dst );
|
||||
void skein256_8x64_init( skein256_8x64_context *sc );
|
||||
void skein256_8x64_update( void *cc, const void *data, size_t len );
|
||||
void skein256_8x64_close( void *cc, void *dst );
|
||||
|
||||
#define skein512_8way_context skein512_8x64_context
|
||||
#define skein512_8way_full skein512_8x64_full
|
||||
#define skein512_8way_init skein512_8x64_init
|
||||
#define skein512_8way_update skein512_8x64_update
|
||||
#define skein512_8way_close skein512_8x64_close
|
||||
#define skein512_8way_prehash64 skein512_8x64_prehash64
|
||||
#define skein512_8way_final16 skein512_8x64_final16
|
||||
#define skein256_8way_context skein256_8x64_context
|
||||
#define skein256_8way_init skein256_8x64_init
|
||||
#define skein256_8way_update skein256_8x64_update
|
||||
#define skein256_8way_close skein256_8x64_close
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
@@ -81,25 +93,35 @@ typedef struct
|
||||
__m256i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
size_t ptr;
|
||||
uint64_t bcount;
|
||||
} skein_4way_big_context __attribute__ ((aligned (128)));
|
||||
} skein_4x64_big_context __attribute__ ((aligned (128)));
|
||||
|
||||
typedef skein_4way_big_context skein512_4way_context;
|
||||
typedef skein_4way_big_context skein256_4way_context;
|
||||
typedef skein_4x64_big_context skein512_4x64_context;
|
||||
typedef skein_4x64_big_context skein256_4x64_context;
|
||||
|
||||
void skein512_4way_init( skein512_4way_context *sc );
|
||||
void skein512_4way_full( skein512_4way_context *sc, void *out,
|
||||
void skein512_4x64_init( skein512_4x64_context *sc );
|
||||
void skein512_4x64_full( skein512_4x64_context *sc, void *out,
|
||||
const void *data, size_t len );
|
||||
void skein512_4way_update( void *cc, const void *data, size_t len );
|
||||
void skein512_4way_close( void *cc, void *dst );
|
||||
|
||||
void skein256_4way_init( skein256_4way_context *sc );
|
||||
void skein256_4way_update( void *cc, const void *data, size_t len );
|
||||
void skein256_4way_close( void *cc, void *dst );
|
||||
|
||||
void skein512_4way_prehash64( skein512_4way_context *sc, const void *data );
|
||||
void skein512_4way_final16( skein512_4way_context *sc, void *out,
|
||||
void skein512_4x64_update( void *cc, const void *data, size_t len );
|
||||
void skein512_4x64_close( void *cc, void *dst );
|
||||
void skein512_4x64_prehash64( skein512_4x64_context *sc, const void *data );
|
||||
void skein512_4x64_final16( skein512_4x64_context *sc, void *out,
|
||||
const void *data );
|
||||
|
||||
void skein256_4x64_init( skein256_4x64_context *sc );
|
||||
void skein256_4x64_update( void *cc, const void *data, size_t len );
|
||||
void skein256_4x64_close( void *cc, void *dst );
|
||||
|
||||
#define skein512_4way_context skein512_4x64_context
|
||||
#define skein512_4way_full skein512_4x64_full
|
||||
#define skein512_4way_init skein512_4x64_init
|
||||
#define skein512_4way_update skein512_4x64_update
|
||||
#define skein512_4way_close skein512_4x64_close
|
||||
#define skein512_4way_prehash64 skein512_4x64_prehash64
|
||||
#define skein512_4way_final16 skein512_4x64_final16
|
||||
#define skein256_4way_context skein256_4x64_context
|
||||
#define skein256_4way_init skein256_4x64_init
|
||||
#define skein256_4way_update skein256_4x64_update
|
||||
#define skein256_4way_close skein256_4x64_close
|
||||
|
||||
#endif
|
||||
|
||||
@@ -109,10 +131,10 @@ typedef struct
|
||||
v128u64_t h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
size_t ptr;
|
||||
uint64_t bcount;
|
||||
} skein_2way_big_context __attribute__ ((aligned (128)));
|
||||
} skein_2x64_big_context __attribute__ ((aligned (128)));
|
||||
|
||||
typedef skein_2way_big_context skein512_2x64_context;
|
||||
typedef skein_2way_big_context skein256_2x64_context;
|
||||
typedef skein_2x64_big_context skein512_2x64_context;
|
||||
typedef skein_2x64_big_context skein256_2x64_context;
|
||||
|
||||
void skein512_2x64_init( skein512_2x64_context *sc );
|
||||
void skein512_2x64_full( skein512_2x64_context *sc, void *out,
|
||||
|
||||
@@ -21,17 +21,17 @@ int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
skein512_8way_context ctx;
|
||||
skein512_8x64_context ctx;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
skein512_8way_prehash64( &ctx, vdata );
|
||||
skein512_8x64_prehash64( &ctx, vdata );
|
||||
do
|
||||
{
|
||||
skein512_8way_final16( &ctx, hash, vdata + (16*8) );
|
||||
skein512_8way_full( &ctx, hash, hash, 64 );
|
||||
skein512_8x64_final16( &ctx, hash, vdata + (16*8) );
|
||||
skein512_8x64_full( &ctx, hash, hash, 64 );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hashq3[ lane ] <= targq3 && !bench ) )
|
||||
@@ -71,16 +71,16 @@ int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
skein512_4way_context ctx;
|
||||
skein512_4x64_context ctx;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_prehash64( &ctx, vdata );
|
||||
skein512_4x64_prehash64( &ctx, vdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
skein512_4way_final16( &ctx, hash, vdata + (16*4) );
|
||||
skein512_4way_full( &ctx, hash, hash, 64 );
|
||||
skein512_4x64_final16( &ctx, hash, vdata + (16*4) );
|
||||
skein512_4x64_full( &ctx, hash, hash, 64 );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash_q3[ lane ] <= targ_q3 )
|
||||
|
||||
@@ -189,7 +189,7 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
|
||||
v128_bswap32_80( edata, pdata );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
uint32_t ntime = bswap_32(pdata[17]);
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
hex_getAlgoString( (const uint32_t*) (&edata[1]), x16r_hash_order );
|
||||
|
||||
@@ -31,18 +31,18 @@ void x16r_8way_prehash( void *vdata, void *pdata, const char *hash_order )
|
||||
{
|
||||
case JH:
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
jh512_8way_init( &x16r_ctx.jh );
|
||||
jh512_8way_update( &x16r_ctx.jh, vdata, 64 );
|
||||
jh512_8x64_init( &x16r_ctx.jh );
|
||||
jh512_8x64_update( &x16r_ctx.jh, vdata, 64 );
|
||||
break;
|
||||
case KECCAK:
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
keccak512_8way_init( &x16r_ctx.keccak );
|
||||
keccak512_8way_update( &x16r_ctx.keccak, vdata, 72 );
|
||||
keccak512_8x64_init( &x16r_ctx.keccak );
|
||||
keccak512_8x64_update( &x16r_ctx.keccak, vdata, 72 );
|
||||
break;
|
||||
case SKEIN:
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
skein512_8way_init( &x16r_ctx.skein );
|
||||
skein512_8way_update( &x16r_ctx.skein, vdata, 64 );
|
||||
skein512_8x64_init( &x16r_ctx.skein );
|
||||
skein512_8x64_update( &x16r_ctx.skein, vdata, 64 );
|
||||
break;
|
||||
case LUFFA:
|
||||
{
|
||||
@@ -78,8 +78,8 @@ void x16r_8way_prehash( void *vdata, void *pdata, const char *hash_order )
|
||||
break;
|
||||
case HAMSI:
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
hamsi512_8way_init( &x16r_ctx.hamsi );
|
||||
hamsi512_8way_update( &x16r_ctx.hamsi, vdata, 72 );
|
||||
hamsi512_8x64_init( &x16r_ctx.hamsi );
|
||||
hamsi512_8x64_update( &x16r_ctx.hamsi, vdata, 72 );
|
||||
break;
|
||||
case FUGUE:
|
||||
v128_bswap32_80( edata, pdata );
|
||||
@@ -90,8 +90,8 @@ void x16r_8way_prehash( void *vdata, void *pdata, const char *hash_order )
|
||||
break;
|
||||
case SHABAL:
|
||||
mm256_bswap32_intrlv80_8x32( vdata2, pdata );
|
||||
shabal512_8way_init( &x16r_ctx.shabal );
|
||||
shabal512_8way_update( &x16r_ctx.shabal, vdata2, 64 );
|
||||
shabal512_8x32_init( &x16r_ctx.shabal );
|
||||
shabal512_8x32_update( &x16r_ctx.shabal, vdata2, 64 );
|
||||
rintrlv_8x32_8x64( vdata, vdata2, 640 );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
@@ -146,27 +146,27 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid,
|
||||
{
|
||||
case BLAKE:
|
||||
if ( i == 0 )
|
||||
blake512_8way_full( &ctx.blake, vhash, input, size );
|
||||
blake512_8x64_full( &ctx.blake, vhash, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
blake512_8way_full( &ctx.blake, vhash, vhash, size );
|
||||
blake512_8x64_full( &ctx.blake, vhash, vhash, size );
|
||||
}
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5,
|
||||
hash6, hash7, vhash );
|
||||
break;
|
||||
case BMW:
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8x64_init( &ctx.bmw );
|
||||
if ( i == 0 )
|
||||
bmw512_8way_update( &ctx.bmw, input, size );
|
||||
bmw512_8x64_update( &ctx.bmw, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, size );
|
||||
bmw512_8x64_update( &ctx.bmw, vhash, size );
|
||||
}
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
bmw512_8x64_close( &ctx.bmw, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
@@ -191,43 +191,43 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid,
|
||||
break;
|
||||
case JH:
|
||||
if ( i == 0 )
|
||||
jh512_8way_update( &ctx.jh, input + (64<<3), 16 );
|
||||
jh512_8x64_update( &ctx.jh, input + (64<<3), 16 );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, size );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, size );
|
||||
}
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
jh512_8x64_close( &ctx.jh, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case KECCAK:
|
||||
if ( i == 0 )
|
||||
keccak512_8way_update( &ctx.keccak, input + (72<<3), 8 );
|
||||
keccak512_8x64_update( &ctx.keccak, input + (72<<3), 8 );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, size );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, size );
|
||||
}
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
keccak512_8x64_close( &ctx.keccak, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case SKEIN:
|
||||
if ( i == 0 )
|
||||
skein512_8way_update( &ctx.skein, input + (64<<3), 16 );
|
||||
skein512_8x64_update( &ctx.skein, input + (64<<3), 16 );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, size );
|
||||
skein512_8x64_init( &ctx.skein );
|
||||
skein512_8x64_update( &ctx.skein, vhash, size );
|
||||
}
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
skein512_8x64_close( &ctx.skein, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
@@ -333,15 +333,15 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid,
|
||||
break;
|
||||
case HAMSI:
|
||||
if ( i == 0 )
|
||||
hamsi512_8way_update( &ctx.hamsi, input + (72<<3), 8 );
|
||||
hamsi512_8x64_update( &ctx.hamsi, input + (72<<3), 8 );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, size );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhash, size );
|
||||
}
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
@@ -388,13 +388,13 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid,
|
||||
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
if ( i == 0 )
|
||||
shabal512_8way_update( &ctx.shabal, vhash + (16<<3), 16 );
|
||||
shabal512_8x32_update( &ctx.shabal, vhash + (16<<3), 16 );
|
||||
else
|
||||
{
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhash, size );
|
||||
shabal512_8x32_init( &ctx.shabal );
|
||||
shabal512_8x32_update( &ctx.shabal, vhash, size );
|
||||
}
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
shabal512_8x32_close( &ctx.shabal, vhash );
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
@@ -438,16 +438,16 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid,
|
||||
}
|
||||
break;
|
||||
case SHA_512:
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8x64_init( &ctx.sha512 );
|
||||
if ( i == 0 )
|
||||
sha512_8way_update( &ctx.sha512, input, size );
|
||||
sha512_8x64_update( &ctx.sha512, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, size );
|
||||
sha512_8x64_update( &ctx.sha512, vhash, size );
|
||||
}
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
sha512_8x64_close( &ctx.sha512, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
@@ -556,17 +556,17 @@ void x16r_4way_prehash( void *vdata, void *pdata, const char *hash_order )
|
||||
{
|
||||
case JH:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
jh512_4way_init( &x16r_ctx.jh );
|
||||
jh512_4way_update( &x16r_ctx.jh, vdata, 64 );
|
||||
jh512_4x64_init( &x16r_ctx.jh );
|
||||
jh512_4x64_update( &x16r_ctx.jh, vdata, 64 );
|
||||
break;
|
||||
case KECCAK:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
keccak512_4way_init( &x16r_ctx.keccak );
|
||||
keccak512_4way_update( &x16r_ctx.keccak, vdata, 72 );
|
||||
keccak512_4x64_init( &x16r_ctx.keccak );
|
||||
keccak512_4x64_update( &x16r_ctx.keccak, vdata, 72 );
|
||||
break;
|
||||
case SKEIN:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_prehash64( &x16r_ctx.skein, vdata );
|
||||
skein512_4x64_prehash64( &x16r_ctx.skein, vdata );
|
||||
break;
|
||||
case LUFFA:
|
||||
{
|
||||
@@ -599,8 +599,8 @@ void x16r_4way_prehash( void *vdata, void *pdata, const char *hash_order )
|
||||
break;
|
||||
case HAMSI:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
hamsi512_4way_init( &x16r_ctx.hamsi );
|
||||
hamsi512_4way_update( &x16r_ctx.hamsi, vdata, 72 );
|
||||
hamsi512_4x64_init( &x16r_ctx.hamsi );
|
||||
hamsi512_4x64_update( &x16r_ctx.hamsi, vdata, 72 );
|
||||
break;
|
||||
case FUGUE:
|
||||
v128_bswap32_80( edata, pdata );
|
||||
@@ -610,8 +610,8 @@ void x16r_4way_prehash( void *vdata, void *pdata, const char *hash_order )
|
||||
break;
|
||||
case SHABAL:
|
||||
v128_bswap32_intrlv80_4x32( vdata2, pdata );
|
||||
shabal512_4way_init( &x16r_ctx.shabal );
|
||||
shabal512_4way_update( &x16r_ctx.shabal, vdata2, 64 );
|
||||
shabal512_4x32_init( &x16r_ctx.shabal );
|
||||
shabal512_4x32_update( &x16r_ctx.shabal, vdata2, 64 );
|
||||
rintrlv_4x32_4x64( vdata, vdata2, 640 );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
@@ -652,24 +652,24 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid,
|
||||
{
|
||||
case BLAKE:
|
||||
if ( i == 0 )
|
||||
blake512_4way_full( &ctx.blake, vhash, input, size );
|
||||
blake512_4x64_full( &ctx.blake, vhash, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
blake512_4way_full( &ctx.blake, vhash, vhash, size );
|
||||
blake512_4x64_full( &ctx.blake, vhash, vhash, size );
|
||||
}
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case BMW:
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
if ( i == 0 )
|
||||
bmw512_4way_update( &ctx.bmw, input, size );
|
||||
bmw512_4x64_update( &ctx.bmw, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, size );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, size );
|
||||
}
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
bmw512_4x64_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case GROESTL:
|
||||
@@ -689,35 +689,35 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid,
|
||||
break;
|
||||
case JH:
|
||||
if ( i == 0 )
|
||||
jh512_4way_update( &ctx.jh, input + (64<<2), 16 );
|
||||
jh512_4x64_update( &ctx.jh, input + (64<<2), 16 );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, size );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, size );
|
||||
}
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4x64_close( &ctx.jh, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case KECCAK:
|
||||
if ( i == 0 )
|
||||
keccak512_4way_update( &ctx.keccak, input + (72<<2), 8 );
|
||||
keccak512_4x64_update( &ctx.keccak, input + (72<<2), 8 );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, size );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, size );
|
||||
}
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
keccak512_4x64_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case SKEIN:
|
||||
if ( i == 0 )
|
||||
skein512_4way_final16( &ctx.skein, vhash, input + (64*4) );
|
||||
skein512_4x64_final16( &ctx.skein, vhash, input + (64*4) );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, size );
|
||||
skein512_4x64_full( &ctx.skein, vhash, vhash, size );
|
||||
}
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
@@ -809,14 +809,14 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid,
|
||||
break;
|
||||
case HAMSI:
|
||||
if ( i == 0 )
|
||||
hamsi512_4way_update( &ctx.hamsi, input + (72<<2), 8 );
|
||||
hamsi512_4x64_update( &ctx.hamsi, input + (72<<2), 8 );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, size );
|
||||
hamsi512_4x64_init( &ctx.hamsi );
|
||||
hamsi512_4x64_update( &ctx.hamsi, vhash, size );
|
||||
}
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_4x64_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case FUGUE:
|
||||
@@ -845,13 +845,13 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid,
|
||||
case SHABAL:
|
||||
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||
if ( i == 0 )
|
||||
shabal512_4way_update( &ctx.shabal, vhash + (16<<2), 16 );
|
||||
shabal512_4x32_update( &ctx.shabal, vhash + (16<<2), 16 );
|
||||
else
|
||||
{
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, size );
|
||||
shabal512_4x32_init( &ctx.shabal );
|
||||
shabal512_4x32_update( &ctx.shabal, vhash, size );
|
||||
}
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
shabal512_4x32_close( &ctx.shabal, vhash );
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
@@ -878,16 +878,16 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid,
|
||||
}
|
||||
break;
|
||||
case SHA_512:
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4x64_init( &ctx.sha512 );
|
||||
if ( i == 0 )
|
||||
sha512_4way_update( &ctx.sha512, input, size );
|
||||
sha512_4x64_update( &ctx.sha512, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, size );
|
||||
sha512_4x64_init( &ctx.sha512 );
|
||||
sha512_4x64_update( &ctx.sha512, vhash, size );
|
||||
}
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
sha512_4x64_close( &ctx.sha512, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -125,19 +125,19 @@ bool register_x21s__algo( algo_gate_t* gate );
|
||||
|
||||
union _x16r_8way_context_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
blake512_8x64_context blake;
|
||||
bmw512_8x64_context bmw;
|
||||
skein512_8x64_context skein;
|
||||
jh512_8x64_context jh;
|
||||
keccak512_8x64_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cube_4way_context cube;
|
||||
simd_4way_context simd;
|
||||
hamsi512_8way_context hamsi;
|
||||
hamsi512_8x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_8way_context shabal;
|
||||
shabal512_8x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_8way_context sha512;
|
||||
sha512_8x64_context sha512;
|
||||
#if defined(__VAES__)
|
||||
groestl512_4way_context groestl;
|
||||
shavite512_4way_context shavite;
|
||||
@@ -170,8 +170,8 @@ int scanhash_x16r_8way( struct work *, uint32_t ,
|
||||
|
||||
union _x16r_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
blake512_4x64_context blake;
|
||||
bmw512_4x64_context bmw;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
shavite512_2way_context shavite;
|
||||
@@ -181,17 +181,17 @@ union _x16r_4way_context_overlay
|
||||
shavite512_context shavite;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
skein512_4x64_context skein;
|
||||
jh512_4x64_context jh;
|
||||
keccak512_4x64_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cube_2way_context cube;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
hamsi512_4x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_4way_context shabal;
|
||||
shabal512_4x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
sha512_4x64_context sha512;
|
||||
} __attribute__ ((aligned (64)));
|
||||
#define _x16r_4x64_context_overlay _x16r_4way_context_overlay
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
||||
v128_bswap32_80( edata, pdata );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
uint32_t masked_ntime = swab32( pdata[17] ) & 0xffffff80;
|
||||
uint32_t masked_ntime = bswap_32( pdata[17] ) & 0xffffff80;
|
||||
if ( s_ntime != masked_ntime )
|
||||
{
|
||||
x16rt_getTimeHash( masked_ntime, &timeHash );
|
||||
@@ -28,7 +28,7 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
||||
s_ntime = masked_ntime;
|
||||
if ( !thr_id )
|
||||
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
||||
x16r_hash_order, swab32( pdata[17] ), timeHash );
|
||||
x16r_hash_order, bswap_32( pdata[17] ), timeHash );
|
||||
}
|
||||
|
||||
x16r_prehash( edata, pdata, x16r_hash_order );
|
||||
|
||||
@@ -14,19 +14,19 @@
|
||||
|
||||
union _x16rv2_8way_context_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
blake512_8x64_context blake;
|
||||
bmw512_8x64_context bmw;
|
||||
skein512_8x64_context skein;
|
||||
jh512_8x64_context jh;
|
||||
keccak512_8x64_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cubehashParam cube;
|
||||
simd_4way_context simd;
|
||||
hamsi512_8way_context hamsi;
|
||||
hamsi512_8x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_8way_context shabal;
|
||||
shabal512_8x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_8way_context sha512;
|
||||
sha512_8x64_context sha512;
|
||||
sph_tiger_context tiger;
|
||||
#if defined(__VAES__)
|
||||
groestl512_4way_context groestl;
|
||||
@@ -76,29 +76,29 @@ int x16rv2_8way_hash( void* output, const void* input, int thrid )
|
||||
switch ( algo )
|
||||
{
|
||||
case BLAKE:
|
||||
blake512_8way_init( &ctx.blake );
|
||||
blake512_8x64_init( &ctx.blake );
|
||||
if ( i == 0 )
|
||||
blake512_8way_full( &ctx.blake, vhash, input, size );
|
||||
blake512_8x64_full( &ctx.blake, vhash, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
blake512_8way_full( &ctx.blake, vhash, vhash, size );
|
||||
blake512_8x64_full( &ctx.blake, vhash, vhash, size );
|
||||
}
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5,
|
||||
hash6, hash7, vhash );
|
||||
break;
|
||||
case BMW:
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8x64_init( &ctx.bmw );
|
||||
if ( i == 0 )
|
||||
bmw512_8way_update( &ctx.bmw, input, size );
|
||||
bmw512_8x64_update( &ctx.bmw, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, size );
|
||||
bmw512_8x64_update( &ctx.bmw, vhash, size );
|
||||
}
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
bmw512_8x64_close( &ctx.bmw, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
@@ -123,15 +123,15 @@ int x16rv2_8way_hash( void* output, const void* input, int thrid )
|
||||
break;
|
||||
case JH:
|
||||
if ( i == 0 )
|
||||
jh512_8way_update( &ctx.jh, input + (64<<3), 16 );
|
||||
jh512_8x64_update( &ctx.jh, input + (64<<3), 16 );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, size );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, size );
|
||||
}
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
jh512_8x64_close( &ctx.jh, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
@@ -165,30 +165,30 @@ int x16rv2_8way_hash( void* output, const void* input, int thrid )
|
||||
}
|
||||
else
|
||||
{
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in0, size );
|
||||
sph_tiger_close( &ctx.tiger, hash0 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in1, size );
|
||||
sph_tiger_close( &ctx.tiger, hash1 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in2, size );
|
||||
sph_tiger_close( &ctx.tiger, hash2 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in3, size );
|
||||
sph_tiger_close( &ctx.tiger, hash3 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in4, size );
|
||||
sph_tiger_close( &ctx.tiger, hash4 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in5, size );
|
||||
sph_tiger_close( &ctx.tiger, hash5 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in6, size );
|
||||
sph_tiger_close( &ctx.tiger, hash6 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in7, size );
|
||||
sph_tiger_close( &ctx.tiger, hash7 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in0, size );
|
||||
sph_tiger_close( &ctx.tiger, hash0 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in1, size );
|
||||
sph_tiger_close( &ctx.tiger, hash1 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in2, size );
|
||||
sph_tiger_close( &ctx.tiger, hash2 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in3, size );
|
||||
sph_tiger_close( &ctx.tiger, hash3 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in4, size );
|
||||
sph_tiger_close( &ctx.tiger, hash4 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in5, size );
|
||||
sph_tiger_close( &ctx.tiger, hash5 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in6, size );
|
||||
sph_tiger_close( &ctx.tiger, hash6 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in7, size );
|
||||
sph_tiger_close( &ctx.tiger, hash7 );
|
||||
}
|
||||
|
||||
for ( int i = (24/4); i < (64/4); i++ )
|
||||
@@ -197,23 +197,23 @@ int x16rv2_8way_hash( void* output, const void* input, int thrid )
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5,
|
||||
hash6, hash7 );
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8x64_close( &ctx.keccak, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case SKEIN:
|
||||
if ( i == 0 )
|
||||
skein512_8way_update( &ctx.skein, input + (64<<3), 16 );
|
||||
skein512_8x64_update( &ctx.skein, input + (64<<3), 16 );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, size );
|
||||
skein512_8x64_init( &ctx.skein );
|
||||
skein512_8x64_update( &ctx.skein, vhash, size );
|
||||
}
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
skein512_8x64_close( &ctx.skein, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
@@ -395,16 +395,16 @@ int x16rv2_8way_hash( void* output, const void* input, int thrid )
|
||||
break;
|
||||
case HAMSI:
|
||||
if ( i == 0 )
|
||||
hamsi512_8way_update( &ctx.hamsi, input + (72<<3), 8 );
|
||||
hamsi512_8x64_update( &ctx.hamsi, input + (72<<3), 8 );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, size );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhash, size );
|
||||
}
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
@@ -451,13 +451,13 @@ int x16rv2_8way_hash( void* output, const void* input, int thrid )
|
||||
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
if ( i == 0 )
|
||||
shabal512_8way_update( &ctx.shabal, vhash + (16<<3), 16 );
|
||||
shabal512_8x32_update( &ctx.shabal, vhash + (16<<3), 16 );
|
||||
else
|
||||
{
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhash, size );
|
||||
shabal512_8x32_init( &ctx.shabal );
|
||||
shabal512_8x32_update( &ctx.shabal, vhash, size );
|
||||
}
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
shabal512_8x32_close( &ctx.shabal, vhash );
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
@@ -562,9 +562,9 @@ int x16rv2_8way_hash( void* output, const void* input, int thrid )
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5,
|
||||
hash6, hash7 );
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
sha512_8x64_init( &ctx.sha512 );
|
||||
sha512_8x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8x64_close( &ctx.sha512, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
@@ -623,8 +623,8 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
case JH:
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
jh512_8way_init( &x16rv2_ctx.jh );
|
||||
jh512_8way_update( &x16rv2_ctx.jh, vdata, 64 );
|
||||
jh512_8x64_init( &x16rv2_ctx.jh );
|
||||
jh512_8x64_update( &x16rv2_ctx.jh, vdata, 64 );
|
||||
break;
|
||||
case KECCAK:
|
||||
case LUFFA:
|
||||
@@ -637,8 +637,8 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
break;
|
||||
case SKEIN:
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
skein512_8way_init( &x16rv2_ctx.skein );
|
||||
skein512_8way_update( &x16rv2_ctx.skein, vdata, 64 );
|
||||
skein512_8x64_init( &x16rv2_ctx.skein );
|
||||
skein512_8x64_update( &x16rv2_ctx.skein, vdata, 64 );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
v128_bswap32_80( edata, pdata );
|
||||
@@ -649,8 +649,8 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
break;
|
||||
case HAMSI:
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
hamsi512_8way_init( &x16rv2_ctx.hamsi );
|
||||
hamsi512_8way_update( &x16rv2_ctx.hamsi, vdata, 72 );
|
||||
hamsi512_8x64_init( &x16rv2_ctx.hamsi );
|
||||
hamsi512_8x64_update( &x16rv2_ctx.hamsi, vdata, 72 );
|
||||
break;
|
||||
case FUGUE:
|
||||
v128_bswap32_80( edata, pdata );
|
||||
@@ -661,8 +661,8 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
break;
|
||||
case SHABAL:
|
||||
mm256_bswap32_intrlv80_8x32( vdata2, pdata );
|
||||
shabal512_8way_init( &x16rv2_ctx.shabal );
|
||||
shabal512_8way_update( &x16rv2_ctx.shabal, vdata2, 64 );
|
||||
shabal512_8x32_init( &x16rv2_ctx.shabal );
|
||||
shabal512_8x32_update( &x16rv2_ctx.shabal, vdata2, 64 );
|
||||
rintrlv_8x32_8x64( vdata, vdata2, 640 );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
@@ -701,8 +701,8 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
union _x16rv2_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
blake512_4x64_context blake;
|
||||
bmw512_4x64_context bmw;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
shavite512_2way_context shavite;
|
||||
@@ -712,17 +712,17 @@ union _x16rv2_4way_context_overlay
|
||||
shavite512_context shavite;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
skein512_4x64_context skein;
|
||||
jh512_4x64_context jh;
|
||||
keccak512_4x64_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cubehashParam cube;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
hamsi512_4x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_4way_context shabal;
|
||||
shabal512_4x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
sha512_4x64_context sha512;
|
||||
sph_tiger_context tiger;
|
||||
};
|
||||
typedef union _x16rv2_4way_context_overlay x16rv2_4way_context_overlay;
|
||||
@@ -761,24 +761,24 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
||||
{
|
||||
case BLAKE:
|
||||
if ( i == 0 )
|
||||
blake512_4way_full( &ctx.blake, vhash, input, size );
|
||||
blake512_4x64_full( &ctx.blake, vhash, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
blake512_4way_full( &ctx.blake, vhash, vhash, size );
|
||||
blake512_4x64_full( &ctx.blake, vhash, vhash, size );
|
||||
}
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case BMW:
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
if ( i == 0 )
|
||||
bmw512_4way_update( &ctx.bmw, input, size );
|
||||
bmw512_4x64_update( &ctx.bmw, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, size );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, size );
|
||||
}
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
bmw512_4x64_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case GROESTL:
|
||||
@@ -798,14 +798,14 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
||||
break;
|
||||
case JH:
|
||||
if ( i == 0 )
|
||||
jh512_4way_update( &ctx.jh, input + (64<<2), 16 );
|
||||
jh512_4x64_update( &ctx.jh, input + (64<<2), 16 );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, size );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, size );
|
||||
}
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4x64_close( &ctx.jh, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case KECCAK:
|
||||
@@ -842,20 +842,20 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
||||
hash0[i] = hash1[i] = hash2[i] = hash3[i] = 0;
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4x64_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case SKEIN:
|
||||
if ( i == 0 )
|
||||
skein512_4way_final16( &ctx.skein, vhash, input + (64*4) );
|
||||
skein512_4x64_final16( &ctx.skein, vhash, input + (64*4) );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, size );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4x64_init( &ctx.skein );
|
||||
skein512_4x64_update( &ctx.skein, vhash, size );
|
||||
skein512_4x64_close( &ctx.skein, vhash );
|
||||
}
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
@@ -976,14 +976,14 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
||||
break;
|
||||
case HAMSI:
|
||||
if ( i == 0 )
|
||||
hamsi512_4way_update( &ctx.hamsi, input + (72<<2), 8 );
|
||||
hamsi512_4x64_update( &ctx.hamsi, input + (72<<2), 8 );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, size );
|
||||
hamsi512_4x64_init( &ctx.hamsi );
|
||||
hamsi512_4x64_update( &ctx.hamsi, vhash, size );
|
||||
}
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_4x64_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case FUGUE:
|
||||
@@ -1012,13 +1012,13 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
||||
case SHABAL:
|
||||
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||
if ( i == 0 )
|
||||
shabal512_4way_update( &ctx.shabal, vhash + (16<<2), 16 );
|
||||
shabal512_4x32_update( &ctx.shabal, vhash + (16<<2), 16 );
|
||||
else
|
||||
{
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, size );
|
||||
shabal512_4x32_init( &ctx.shabal );
|
||||
shabal512_4x32_update( &ctx.shabal, vhash, size );
|
||||
}
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
shabal512_4x32_close( &ctx.shabal, vhash );
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
@@ -1078,9 +1078,9 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
||||
hash0[i] = hash1[i] = hash2[i] = hash3[i] = 0;
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
sha512_4x64_init( &ctx.sha512 );
|
||||
sha512_4x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4x64_close( &ctx.sha512, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
}
|
||||
@@ -1133,8 +1133,8 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
case JH:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
jh512_4way_init( &x16rv2_ctx.jh );
|
||||
jh512_4way_update( &x16rv2_ctx.jh, vdata, 64 );
|
||||
jh512_4x64_init( &x16rv2_ctx.jh );
|
||||
jh512_4x64_update( &x16rv2_ctx.jh, vdata, 64 );
|
||||
break;
|
||||
case KECCAK:
|
||||
case LUFFA:
|
||||
@@ -1146,7 +1146,7 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
break;
|
||||
case SKEIN:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_prehash64( &x16rv2_ctx.skein, vdata );
|
||||
skein512_4x64_prehash64( &x16rv2_ctx.skein, vdata );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
v128_bswap32_80( edata, pdata );
|
||||
@@ -1156,8 +1156,8 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
break;
|
||||
case HAMSI:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
hamsi512_4way_init( &x16rv2_ctx.hamsi );
|
||||
hamsi512_4way_update( &x16rv2_ctx.hamsi, vdata, 72 );
|
||||
hamsi512_4x64_init( &x16rv2_ctx.hamsi );
|
||||
hamsi512_4x64_update( &x16rv2_ctx.hamsi, vdata, 72 );
|
||||
break;
|
||||
case FUGUE:
|
||||
v128_bswap32_80( edata, pdata );
|
||||
@@ -1167,8 +1167,8 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
break;
|
||||
case SHABAL:
|
||||
v128_bswap32_intrlv80_4x32( vdata32, pdata );
|
||||
shabal512_4way_init( &x16rv2_ctx.shabal );
|
||||
shabal512_4way_update( &x16rv2_ctx.shabal, vdata32, 64 );
|
||||
shabal512_4x32_init( &x16rv2_ctx.shabal );
|
||||
shabal512_4x32_update( &x16rv2_ctx.shabal, vdata32, 64 );
|
||||
rintrlv_4x32_4x64( vdata, vdata32, 640 );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
|
||||
@@ -168,7 +168,7 @@ int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
if ( s_ntime != pdata[17] )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
uint32_t ntime = bswap_32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const uint8_t*) (&edata[1]), x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
|
||||
@@ -21,10 +21,10 @@ static __thread uint64_t* x21s_8way_matrix;
|
||||
|
||||
union _x21s_8way_context_overlay
|
||||
{
|
||||
haval256_5_8way_context haval;
|
||||
haval256_8x32_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
sha256_8way_context sha256;
|
||||
sha256_8x32_context sha256;
|
||||
} __attribute__ ((aligned (64)));
|
||||
|
||||
typedef union _x21s_8way_context_overlay x21s_8way_context_overlay;
|
||||
@@ -50,9 +50,9 @@ int x21s_8way_hash( void* output, const void* input, int thrid )
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
haval256_5_8way_init( &ctx.haval );
|
||||
haval256_5_8way_update( &ctx.haval, vhash, 64 );
|
||||
haval256_5_8way_close( &ctx.haval, vhash );
|
||||
haval256_8x32_init( &ctx.haval );
|
||||
haval256_8x32_update( &ctx.haval, vhash, 64 );
|
||||
haval256_8x32_close( &ctx.haval, vhash );
|
||||
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
@@ -122,9 +122,9 @@ int x21s_8way_hash( void* output, const void* input, int thrid )
|
||||
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
sha256_8way_init( &ctx.sha256 );
|
||||
sha256_8way_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_8way_close( &ctx.sha256, output );
|
||||
sha256_8x32_init( &ctx.sha256 );
|
||||
sha256_8x32_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_8x32_close( &ctx.sha256, output );
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -202,11 +202,11 @@ static __thread uint64_t* x21s_4way_matrix;
|
||||
|
||||
union _x21s_4way_context_overlay
|
||||
{
|
||||
haval256_5_4way_context haval;
|
||||
haval256_4x32_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
#if !defined(__SHA__)
|
||||
sha256_4way_context sha256;
|
||||
sha256_4x32_context sha256;
|
||||
#endif
|
||||
} __attribute__ ((aligned (64)));
|
||||
|
||||
@@ -228,9 +228,9 @@ int x21s_4way_hash( void* output, const void* input, int thrid )
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way_update( &ctx.haval, vhash, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, vhash );
|
||||
haval256_4x32_init( &ctx.haval );
|
||||
haval256_4x32_update( &ctx.haval, vhash, 64 );
|
||||
haval256_4x32_close( &ctx.haval, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
@@ -279,9 +279,9 @@ int x21s_4way_hash( void* output, const void* input, int thrid )
|
||||
#else
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
sha256_4way_init( &ctx.sha256 );
|
||||
sha256_4way_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_4way_close( &ctx.sha256, vhash );
|
||||
sha256_4x32_init( &ctx.sha256 );
|
||||
sha256_4x32_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_4x32_close( &ctx.sha256, vhash );
|
||||
dintrlv_4x32( output, output+32, output+64,output+96, vhash, 256 );
|
||||
|
||||
#endif
|
||||
|
||||
@@ -78,7 +78,7 @@ int scanhash_x21s( struct work *work, uint32_t max_nonce,
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
if ( s_ntime != pdata[17] )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
uint32_t ntime = bswap_32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const uint8_t*)(&edata[1]), x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
|
||||
@@ -31,20 +31,20 @@
|
||||
|
||||
union _sonoa_8way_context_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
blake512_8x64_context blake;
|
||||
bmw512_8x64_context bmw;
|
||||
skein512_8x64_context skein;
|
||||
jh512_8x64_context jh;
|
||||
keccak512_8x64_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cube_4way_context cube;
|
||||
simd_4way_context simd;
|
||||
hamsi512_8way_context hamsi;
|
||||
hamsi512_8x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_8way_context shabal;
|
||||
shabal512_8x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_8way_context sha512;
|
||||
haval256_5_8way_context haval;
|
||||
sha512_8x64_context sha512;
|
||||
haval256_8x32_context haval;
|
||||
#if defined(__VAES__)
|
||||
groestl512_4way_context groestl;
|
||||
shavite512_4way_context shavite;
|
||||
@@ -75,9 +75,9 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
// 1
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhash, input, 80 );
|
||||
blake512_8x64_full( &ctx.blake, vhash, input, 80 );
|
||||
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
bmw512_8x64_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -107,15 +107,15 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_8x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -189,7 +189,7 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
if ( work_restart[thr_id].restart ) return 0;
|
||||
// 2
|
||||
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
bmw512_8x64_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -219,15 +219,15 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_8x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -298,14 +298,14 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
if ( work_restart[thr_id].restart ) return 0;
|
||||
// 3
|
||||
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
bmw512_8x64_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -335,17 +335,17 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
skein512_8x64_init( &ctx.skein );
|
||||
skein512_8x64_update( &ctx.skein, vhash, 64 );
|
||||
skein512_8x64_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -416,9 +416,9 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
@@ -438,7 +438,7 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
bmw512_8x64_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -468,15 +468,15 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_8x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -547,9 +547,9 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
@@ -566,15 +566,15 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
shabal512_8x32_init( &ctx.shabal );
|
||||
shabal512_8x32_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8x32_close( &ctx.shabal, vhash );
|
||||
|
||||
rintrlv_8x32_8x64( vhashA, vhash, 512 );
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhashA, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhashA, 64 );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -633,13 +633,13 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
if ( work_restart[thr_id].restart ) return 0;
|
||||
// 5
|
||||
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
bmw512_8x64_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
|
||||
rintrlv_8x64_8x32( vhashA, vhash, 512 );
|
||||
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhashA, 64 );
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
shabal512_8x32_init( &ctx.shabal );
|
||||
shabal512_8x32_update( &ctx.shabal, vhashA, 64 );
|
||||
shabal512_8x32_close( &ctx.shabal, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -669,15 +669,15 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_8x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -748,9 +748,9 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
@@ -767,9 +767,9 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
shabal512_8x32_init( &ctx.shabal );
|
||||
shabal512_8x32_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8x32_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
@@ -789,7 +789,7 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
bmw512_8x64_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -819,15 +819,15 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_8x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -898,9 +898,9 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
@@ -917,9 +917,9 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
shabal512_8x32_init( &ctx.shabal );
|
||||
shabal512_8x32_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8x32_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
@@ -936,9 +936,9 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
sha512_8x64_init( &ctx.sha512 );
|
||||
sha512_8x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8x64_close( &ctx.sha512, vhash );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
@@ -958,7 +958,7 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
bmw512_8x64_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -988,15 +988,15 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_8x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -1067,9 +1067,9 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
@@ -1086,9 +1086,9 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
shabal512_8x32_init( &ctx.shabal );
|
||||
shabal512_8x32_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8x32_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
@@ -1105,15 +1105,15 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
sha512_8x64_init( &ctx.sha512 );
|
||||
sha512_8x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8x64_close( &ctx.sha512, vhash );
|
||||
|
||||
rintrlv_8x64_8x32( vhashA, vhash, 512 );
|
||||
|
||||
haval256_5_8way_init( &ctx.haval );
|
||||
haval256_5_8way_update( &ctx.haval, vhashA, 64 );
|
||||
haval256_5_8way_close( &ctx.haval, state );
|
||||
haval256_8x32_init( &ctx.haval );
|
||||
haval256_8x32_update( &ctx.haval, vhashA, 64 );
|
||||
haval256_8x32_close( &ctx.haval, state );
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -1122,8 +1122,8 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
union _sonoa_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
blake512_4x64_context blake;
|
||||
bmw512_4x64_context bmw;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
echo512_2way_context echo;
|
||||
@@ -1131,19 +1131,19 @@ union _sonoa_4way_context_overlay
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
skein512_4x64_context skein;
|
||||
jh512_4x64_context jh;
|
||||
keccak512_4x64_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cube_2way_context cube;
|
||||
shavite512_2way_context shavite;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
hamsi512_4x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_4way_context shabal;
|
||||
shabal512_4x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
haval256_5_4way_context haval;
|
||||
sha512_4x64_context sha512;
|
||||
haval256_4x32_context haval;
|
||||
};
|
||||
|
||||
typedef union _sonoa_4way_context_overlay sonoa_4way_context_overlay;
|
||||
@@ -1161,11 +1161,11 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
// 1
|
||||
|
||||
blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||
blake512_4x64_full( &ctx.blake, vhash, input, 80 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4x64_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -1189,15 +1189,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_4x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -1241,9 +1241,9 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
if ( work_restart[thr_id].restart ) return 0;
|
||||
// 2
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4x64_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -1267,15 +1267,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_4x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -1316,16 +1316,16 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_4x64_init( &ctx.hamsi );
|
||||
hamsi512_4x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
if ( work_restart[thr_id].restart ) return 0;
|
||||
// 3
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4x64_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -1349,15 +1349,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_4x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -1398,9 +1398,9 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_4x64_init( &ctx.hamsi );
|
||||
hamsi512_4x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
@@ -1413,9 +1413,9 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
// 4
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4x64_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -1439,15 +1439,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_4x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -1488,9 +1488,9 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_4x64_init( &ctx.hamsi );
|
||||
hamsi512_4x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
@@ -1501,15 +1501,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
shabal512_4x32_init( &ctx.shabal );
|
||||
shabal512_4x32_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4x32_close( &ctx.shabal, vhash );
|
||||
|
||||
rintrlv_4x32_4x64( vhashB, vhash, 512 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhashB, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_4x64_init( &ctx.hamsi );
|
||||
hamsi512_4x64_update( &ctx.hamsi, vhashB, 64 );
|
||||
hamsi512_4x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -1545,15 +1545,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
// 5
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4x64_close( &ctx.bmw, vhash );
|
||||
|
||||
rintrlv_4x64_4x32( vhashB, vhash, 512 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way_update( &ctx.shabal, vhashB, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
shabal512_4x32_init( &ctx.shabal );
|
||||
shabal512_4x32_update( &ctx.shabal, vhashB, 64 );
|
||||
shabal512_4x32_close( &ctx.shabal, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -1580,15 +1580,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_4x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -1629,9 +1629,9 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_4x64_init( &ctx.hamsi );
|
||||
hamsi512_4x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
@@ -1642,9 +1642,9 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
shabal512_4x32_init( &ctx.shabal );
|
||||
shabal512_4x32_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4x32_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
@@ -1658,9 +1658,9 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4x64_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -1684,15 +1684,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_4x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -1733,9 +1733,9 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_4x64_init( &ctx.hamsi );
|
||||
hamsi512_4x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
@@ -1746,9 +1746,9 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
shabal512_4x32_init( &ctx.shabal );
|
||||
shabal512_4x32_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4x32_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
@@ -1759,9 +1759,9 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
sha512_4x64_init( &ctx.sha512 );
|
||||
sha512_4x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4x64_close( &ctx.sha512, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
@@ -1775,9 +1775,9 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4x64_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -1801,15 +1801,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_4x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -1850,9 +1850,9 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_4x64_init( &ctx.hamsi );
|
||||
hamsi512_4x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
@@ -1863,9 +1863,9 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
shabal512_4x32_init( &ctx.shabal );
|
||||
shabal512_4x32_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4x32_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
@@ -1876,15 +1876,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
sha512_4x64_init( &ctx.sha512 );
|
||||
sha512_4x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4x64_close( &ctx.sha512, vhash );
|
||||
|
||||
rintrlv_4x64_4x32( vhashB, vhash, 512 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way_update( &ctx.haval, vhashB, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, state );
|
||||
haval256_4x32_init( &ctx.haval );
|
||||
haval256_4x32_update( &ctx.haval, vhashB, 64 );
|
||||
haval256_4x32_close( &ctx.haval, state );
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -31,11 +31,11 @@
|
||||
|
||||
union _x17_16way_context_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
blake512_8x64_context blake;
|
||||
bmw512_8x64_context bmw;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
skein512_8x64_context skein;
|
||||
jh512_8x64_context jh;
|
||||
keccak512_8x64_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cube_4way_2buf_context cube;
|
||||
#if defined(__VAES__)
|
||||
@@ -48,17 +48,17 @@ union _x17_16way_context_overlay
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
simd_4way_context simd;
|
||||
hamsi512_8way_context hamsi;
|
||||
hamsi512_8x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_16way_context shabal;
|
||||
shabal512_16x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_8way_context sha512;
|
||||
haval256_5_16way_context haval;
|
||||
sha512_8x64_context sha512;
|
||||
haval256_16x32_context haval;
|
||||
} __attribute__ ((aligned (64)));
|
||||
typedef union _x17_16way_context_overlay x17_16way_context_overlay;
|
||||
|
||||
static __thread __m512i x17_16way_midstate[16] __attribute__((aligned(64)));
|
||||
static __thread blake512_8way_context blake512_8way_ctx __attribute__((aligned(64)));
|
||||
static __thread blake512_8x64_context blake512_8x64_ctx __attribute__((aligned(64)));
|
||||
|
||||
int x17_16x64_hash( void *state, const __m512i nonceA, const __m512i nonceB,
|
||||
int thr_id )
|
||||
@@ -85,13 +85,10 @@ int x17_16x64_hash( void *state, const __m512i nonceA, const __m512i nonceB,
|
||||
uint64_t hash15[8] __attribute__ ((aligned (32)));
|
||||
x17_16way_context_overlay ctx;
|
||||
|
||||
|
||||
|
||||
|
||||
memcpy( &ctx.blake, &blake512_8way_ctx, sizeof (blake512_8way_ctx) );
|
||||
blake512_8way_final_le( &blake512_8way_ctx, vhashA, nonceA,
|
||||
memcpy( &ctx.blake, &blake512_8x64_ctx, sizeof (blake512_8x64_ctx) );
|
||||
blake512_8x64_final_le( &blake512_8x64_ctx, vhashA, nonceA,
|
||||
x17_16way_midstate );
|
||||
blake512_8way_final_le( &ctx.blake, vhashB, nonceB,
|
||||
blake512_8x64_final_le( &ctx.blake, vhashB, nonceB,
|
||||
x17_16way_midstate );
|
||||
|
||||
bmw512_8x64_full( &ctx.bmw, vhashA, vhashA, 64 );
|
||||
@@ -140,22 +137,22 @@ int x17_16x64_hash( void *state, const __m512i nonceA, const __m512i nonceB,
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_full( &ctx.skein, vhashA, vhashA, 64 );
|
||||
skein512_8way_full( &ctx.skein, vhashB, vhashB, 64 );
|
||||
skein512_8x64_full( &ctx.skein, vhashA, vhashA, 64 );
|
||||
skein512_8x64_full( &ctx.skein, vhashB, vhashB, 64 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhashA, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhashA );
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhashB, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhashB );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhashA, 64 );
|
||||
jh512_8x64_close( &ctx.jh, vhashA );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhashB, 64 );
|
||||
jh512_8x64_close( &ctx.jh, vhashB );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhashA, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhashA );
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhashB, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhashB );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhashA, 64 );
|
||||
keccak512_8x64_close( &ctx.keccak, vhashA );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhashB, 64 );
|
||||
keccak512_8x64_close( &ctx.keccak, vhashB );
|
||||
|
||||
//
|
||||
rintrlv_8x64_4x128( vhashC, vhashD, vhashA, 512 );
|
||||
@@ -310,18 +307,17 @@ int x17_16x64_hash( void *state, const __m512i nonceA, const __m512i nonceB,
|
||||
*/
|
||||
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhashA, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhashA );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhashA, 64 );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhashA );
|
||||
dintrlv_8x64_512( hash00, hash01, hash02, hash03,
|
||||
hash04, hash05, hash06, hash07, vhashA );
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhashB, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhashB );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhashB, 64 );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhashB );
|
||||
dintrlv_8x64_512( hash08, hash09, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15, vhashB );
|
||||
|
||||
|
||||
fugue512_full( &ctx.fugue, hash00, hash00, 64 );
|
||||
fugue512_full( &ctx.fugue, hash01, hash01, 64 );
|
||||
fugue512_full( &ctx.fugue, hash02, hash02, 64 );
|
||||
@@ -344,9 +340,9 @@ int x17_16x64_hash( void *state, const __m512i nonceA, const __m512i nonceB,
|
||||
hash08, hash09, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15 );
|
||||
|
||||
shabal512_16way_init( &ctx.shabal );
|
||||
shabal512_16way_update( &ctx.shabal, vhashA, 64 );
|
||||
shabal512_16way_close( &ctx.shabal, vhashA );
|
||||
shabal512_16x32_init( &ctx.shabal );
|
||||
shabal512_16x32_update( &ctx.shabal, vhashA, 64 );
|
||||
shabal512_16x32_close( &ctx.shabal, vhashA );
|
||||
|
||||
dintrlv_16x32_512( hash00, hash01, hash02, hash03,
|
||||
hash04, hash05, hash06, hash07,
|
||||
@@ -375,12 +371,12 @@ int x17_16x64_hash( void *state, const __m512i nonceA, const __m512i nonceB,
|
||||
intrlv_8x64_512( vhashB, hash08, hash09, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15 );
|
||||
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhashA, 64 );
|
||||
sha512_8way_close( &ctx.sha512, vhashA );
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhashB, 64 );
|
||||
sha512_8way_close( &ctx.sha512, vhashB );
|
||||
sha512_8x64_init( &ctx.sha512 );
|
||||
sha512_8x64_update( &ctx.sha512, vhashA, 64 );
|
||||
sha512_8x64_close( &ctx.sha512, vhashA );
|
||||
sha512_8x64_init( &ctx.sha512 );
|
||||
sha512_8x64_update( &ctx.sha512, vhashB, 64 );
|
||||
sha512_8x64_close( &ctx.sha512, vhashB );
|
||||
|
||||
dintrlv_8x64_512( hash00, hash01, hash02, hash03,
|
||||
hash04, hash05, hash06, hash07, vhashA );
|
||||
@@ -391,9 +387,9 @@ int x17_16x64_hash( void *state, const __m512i nonceA, const __m512i nonceB,
|
||||
hash08, hash09, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15 );
|
||||
|
||||
haval256_5_16way_init( &ctx.haval );
|
||||
haval256_5_16way_update( &ctx.haval, vhashA, 64 );
|
||||
haval256_5_16way_close( &ctx.haval, state );
|
||||
haval256_16x32_init( &ctx.haval );
|
||||
haval256_16x32_update( &ctx.haval, vhashA, 64 );
|
||||
haval256_16x32_close( &ctx.haval, state );
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -425,7 +421,7 @@ int scanhash_x17_16x32( struct work *work, uint32_t max_nonce,
|
||||
edata[4] = v128_swap64_32( casti_v128u32( pdata, 4 ) );
|
||||
|
||||
mm512_intrlv80_8x64( vdata, edata );
|
||||
blake512_8way_prehash_le( &blake512_8way_ctx, x17_16way_midstate, vdata );
|
||||
blake512_8x64_prehash_le( &blake512_8x64_ctx, x17_16way_midstate, vdata );
|
||||
|
||||
nonceA = _mm512_add_epi32( casti_m512i( vdata, 9 ),
|
||||
_mm512_set_epi64( 7, 6, 5, 4, 3, 2, 1, 0 ) );
|
||||
@@ -456,11 +452,11 @@ int scanhash_x17_16x32( struct work *work, uint32_t max_nonce,
|
||||
|
||||
union _x17_8way_context_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
blake512_8x64_context blake;
|
||||
bmw512_8x64_context bmw;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
skein512_8x64_context skein;
|
||||
jh512_8x64_context jh;
|
||||
keccak512_8x64_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cube_4way_2buf_context cube;
|
||||
#if defined(__VAES__)
|
||||
@@ -473,17 +469,17 @@ union _x17_8way_context_overlay
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
simd_4way_context simd;
|
||||
hamsi512_8way_context hamsi;
|
||||
hamsi512_8x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_8way_context shabal;
|
||||
shabal512_8x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_8way_context sha512;
|
||||
haval256_5_8way_context haval;
|
||||
sha512_8x64_context sha512;
|
||||
haval256_8x32_context haval;
|
||||
} __attribute__ ((aligned (64)));
|
||||
typedef union _x17_8way_context_overlay x17_8way_context_overlay;
|
||||
|
||||
static __thread __m512i x17_8way_midstate[16] __attribute__((aligned(64)));
|
||||
static __thread blake512_8way_context blake512_8way_ctx __attribute__((aligned(64)));
|
||||
static __thread blake512_8x64_context blake512_8x64_ctx __attribute__((aligned(64)));
|
||||
|
||||
int x17_8x64_hash( void *state, const void *input, int thr_id )
|
||||
{
|
||||
@@ -500,7 +496,7 @@ int x17_8x64_hash( void *state, const void *input, int thr_id )
|
||||
uint64_t hash7[8] __attribute__ ((aligned (32)));
|
||||
x17_8way_context_overlay ctx;
|
||||
|
||||
blake512_8way_final_le( &blake512_8way_ctx, vhash, casti_m512i( input, 9 ),
|
||||
blake512_8x64_final_le( &blake512_8x64_ctx, vhash, casti_m512i( input, 9 ),
|
||||
x17_8way_midstate );
|
||||
|
||||
bmw512_8x64_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
@@ -533,15 +529,15 @@ int x17_8x64_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_8x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -611,9 +607,9 @@ int x17_8x64_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
|
||||
@@ -629,9 +625,9 @@ int x17_8x64_hash( void *state, const void *input, int thr_id )
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
shabal512_8x32_init( &ctx.shabal );
|
||||
shabal512_8x32_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8x32_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
@@ -648,15 +644,15 @@ int x17_8x64_hash( void *state, const void *input, int thr_id )
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
sha512_8x64_init( &ctx.sha512 );
|
||||
sha512_8x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8x64_close( &ctx.sha512, vhash );
|
||||
|
||||
rintrlv_8x64_8x32( vhashA, vhash, 512 );
|
||||
|
||||
haval256_5_8way_init( &ctx.haval );
|
||||
haval256_5_8way_update( &ctx.haval, vhashA, 64 );
|
||||
haval256_5_8way_close( &ctx.haval, state );
|
||||
haval256_8x32_init( &ctx.haval );
|
||||
haval256_8x32_update( &ctx.haval, vhashA, 64 );
|
||||
haval256_8x32_close( &ctx.haval, state );
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -690,7 +686,7 @@ int scanhash_x17_8x64( struct work *work, uint32_t max_nonce,
|
||||
mm512_intrlv80_8x64( vdata, edata );
|
||||
*noncev = _mm512_add_epi32( *noncev, _mm512_set_epi32(
|
||||
0,7, 0,6, 0,5, 0,4, 0,3, 0,2, 0,1, 0,0 ) );
|
||||
blake512_8way_prehash_le( &blake512_8way_ctx, x17_8way_midstate, vdata );
|
||||
blake512_8x64_prehash_le( &blake512_8x64_ctx, x17_8way_midstate, vdata );
|
||||
|
||||
do
|
||||
{
|
||||
@@ -717,7 +713,7 @@ int scanhash_x17_8x64( struct work *work, uint32_t max_nonce,
|
||||
|
||||
union _x17_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
blake512_4x64_context blake;
|
||||
bmw512_4x64_context bmw;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
@@ -726,24 +722,24 @@ union _x17_4way_context_overlay
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
skein512_4x64_context skein;
|
||||
jh512_4x64_context jh;
|
||||
keccak512_4x64_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cube_2way_context cube;
|
||||
shavite512_2way_context shavite;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
hamsi512_4x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_4way_context shabal;
|
||||
shabal512_4x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
haval256_5_4way_context haval;
|
||||
sha512_4x64_context sha512;
|
||||
haval256_4x32_context haval;
|
||||
};
|
||||
typedef union _x17_4way_context_overlay x17_4way_context_overlay;
|
||||
|
||||
static __thread __m256i x17_4way_midstate[16] __attribute__((aligned(64)));
|
||||
static __thread blake512_4way_context blake512_4way_ctx __attribute__((aligned(64)));
|
||||
static __thread blake512_4x64_context blake512_4x64_ctx __attribute__((aligned(64)));
|
||||
|
||||
int x17_4x64_hash( void *state, const void *input, int thr_id )
|
||||
{
|
||||
@@ -756,11 +752,9 @@ int x17_4x64_hash( void *state, const void *input, int thr_id )
|
||||
uint64_t hash3[8] __attribute__ ((aligned (32)));
|
||||
x17_4way_context_overlay ctx;
|
||||
|
||||
blake512_4way_final_le( &blake512_4way_ctx, vhash, casti_m256i( input, 9 ),
|
||||
blake512_4x64_final_le( &blake512_4x64_ctx, vhash, casti_m256i( input, 9 ),
|
||||
x17_4way_midstate );
|
||||
|
||||
// blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4x64_close( &ctx.bmw, vhash );
|
||||
@@ -789,13 +783,13 @@ int x17_4x64_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
@@ -836,9 +830,9 @@ int x17_4x64_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_4x64_init( &ctx.hamsi );
|
||||
hamsi512_4x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
@@ -849,9 +843,9 @@ int x17_4x64_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
shabal512_4x32_init( &ctx.shabal );
|
||||
shabal512_4x32_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4x32_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
@@ -862,15 +856,15 @@ int x17_4x64_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
sha512_4x64_init( &ctx.sha512 );
|
||||
sha512_4x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4x64_close( &ctx.sha512, vhash );
|
||||
|
||||
rintrlv_4x64_4x32( vhashB, vhash, 512 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way_update( &ctx.haval, vhashB, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, state );
|
||||
haval256_4x32_init( &ctx.haval );
|
||||
haval256_4x32_update( &ctx.haval, vhashB, 64 );
|
||||
haval256_4x32_close( &ctx.haval, state );
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -903,7 +897,7 @@ int scanhash_x17_4x64( struct work *work, uint32_t max_nonce,
|
||||
|
||||
mm256_intrlv80_4x64( vdata, edata );
|
||||
*noncev = _mm256_add_epi32( *noncev, _mm256_set_epi32( 0,3,0,2, 0,1,0,0 ) );
|
||||
blake512_4way_prehash_le( &blake512_4way_ctx, x17_4way_midstate, vdata );
|
||||
blake512_4x64_prehash_le( &blake512_4x64_ctx, x17_4way_midstate, vdata );
|
||||
|
||||
do
|
||||
{
|
||||
|
||||
@@ -6,10 +6,8 @@
|
||||
|
||||
#if defined(SIMD512)
|
||||
#define X17_8WAY 1
|
||||
// #define X17_16X32 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define X17_4WAY 1
|
||||
#define X17_8X32 1
|
||||
#elif defined(__SSE2__) || defined(__ARM_NEON)
|
||||
#define X17_2X64 1
|
||||
#endif
|
||||
|
||||
@@ -31,20 +31,20 @@
|
||||
|
||||
union _xevan_8way_context_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
blake512_8x64_context blake;
|
||||
bmw512_8x64_context bmw;
|
||||
skein512_8x64_context skein;
|
||||
jh512_8x64_context jh;
|
||||
keccak512_8x64_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cube_4way_context cube;
|
||||
simd_4way_context simd;
|
||||
hamsi512_8way_context hamsi;
|
||||
hamsi512_8x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_8way_context shabal;
|
||||
shabal512_8x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_8way_context sha512;
|
||||
haval256_5_8way_context haval;
|
||||
sha512_8x64_context sha512;
|
||||
haval256_8x32_context haval;
|
||||
#if defined(__VAES__)
|
||||
groestl512_4way_context groestl;
|
||||
shavite512_4way_context shavite;
|
||||
@@ -73,10 +73,10 @@ int xevan_8way_hash( void *output, const void *input, int thr_id )
|
||||
const int dataLen = 128;
|
||||
xevan_8way_context_overlay ctx __attribute__ ((aligned (64)));
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhash, input, 80 );
|
||||
blake512_8x64_full( &ctx.blake, vhash, input, 80 );
|
||||
memset( &vhash[8<<3], 0, 64<<3 );
|
||||
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, dataLen );
|
||||
bmw512_8x64_full( &ctx.bmw, vhash, vhash, dataLen );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -106,15 +106,15 @@ int xevan_8way_hash( void *output, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
skein512_8x64_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, dataLen );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, dataLen );
|
||||
jh512_8x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, dataLen );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, dataLen );
|
||||
keccak512_8x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
|
||||
@@ -185,9 +185,9 @@ int xevan_8way_hash( void *output, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, dataLen<<3 );
|
||||
@@ -204,9 +204,9 @@ int xevan_8way_hash( void *output, const void *input, int thr_id )
|
||||
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, dataLen<<3 );
|
||||
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhash, dataLen );
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
shabal512_8x32_init( &ctx.shabal );
|
||||
shabal512_8x32_update( &ctx.shabal, vhash, dataLen );
|
||||
shabal512_8x32_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, dataLen<<3 );
|
||||
@@ -223,23 +223,23 @@ int xevan_8way_hash( void *output, const void *input, int thr_id )
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, dataLen<<3 );
|
||||
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, dataLen );
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
sha512_8x64_init( &ctx.sha512 );
|
||||
sha512_8x64_update( &ctx.sha512, vhash, dataLen );
|
||||
sha512_8x64_close( &ctx.sha512, vhash );
|
||||
|
||||
rintrlv_8x64_8x32( vhashA, vhash, dataLen<<3 );
|
||||
|
||||
haval256_5_8way_init( &ctx.haval );
|
||||
haval256_5_8way_update( &ctx.haval, vhashA, dataLen );
|
||||
haval256_5_8way_close( &ctx.haval, vhashA );
|
||||
haval256_8x32_init( &ctx.haval );
|
||||
haval256_8x32_update( &ctx.haval, vhashA, dataLen );
|
||||
haval256_8x32_close( &ctx.haval, vhashA );
|
||||
|
||||
rintrlv_8x32_8x64( vhash, vhashA, dataLen<<3 );
|
||||
|
||||
memset( &vhash[ 4<<3 ], 0, (dataLen-32) << 3 );
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhash, vhash, dataLen );
|
||||
blake512_8x64_full( &ctx.blake, vhash, vhash, dataLen );
|
||||
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, dataLen );
|
||||
bmw512_8x64_full( &ctx.bmw, vhash, vhash, dataLen );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -269,15 +269,15 @@ int xevan_8way_hash( void *output, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
skein512_8x64_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, dataLen );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, dataLen );
|
||||
jh512_8x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, dataLen );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, dataLen );
|
||||
keccak512_8x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
|
||||
@@ -348,9 +348,9 @@ int xevan_8way_hash( void *output, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, dataLen<<3 );
|
||||
@@ -367,9 +367,9 @@ int xevan_8way_hash( void *output, const void *input, int thr_id )
|
||||
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, dataLen<<3 );
|
||||
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhash, dataLen );
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
shabal512_8x32_init( &ctx.shabal );
|
||||
shabal512_8x32_update( &ctx.shabal, vhash, dataLen );
|
||||
shabal512_8x32_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, dataLen<<3 );
|
||||
@@ -386,15 +386,15 @@ int xevan_8way_hash( void *output, const void *input, int thr_id )
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, dataLen<<3 );
|
||||
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, dataLen );
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
sha512_8x64_init( &ctx.sha512 );
|
||||
sha512_8x64_update( &ctx.sha512, vhash, dataLen );
|
||||
sha512_8x64_close( &ctx.sha512, vhash );
|
||||
|
||||
rintrlv_8x64_8x32( vhashA, vhash, dataLen<<3 );
|
||||
|
||||
haval256_5_8way_init( &ctx.haval );
|
||||
haval256_5_8way_update( &ctx.haval, vhashA, dataLen );
|
||||
haval256_5_8way_close( &ctx.haval, output );
|
||||
haval256_8x32_init( &ctx.haval );
|
||||
haval256_8x32_update( &ctx.haval, vhashA, dataLen );
|
||||
haval256_8x32_close( &ctx.haval, output );
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -403,28 +403,28 @@ int xevan_8way_hash( void *output, const void *input, int thr_id )
|
||||
|
||||
union _xevan_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
blake512_4x64_context blake;
|
||||
bmw512_4x64_context bmw;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
echo_2way_context echo;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
skein512_4x64_context skein;
|
||||
jh512_4x64_context jh;
|
||||
keccak512_4x64_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cube_2way_context cube;
|
||||
shavite512_2way_context shavite;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
hamsi512_4x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_4way_context shabal;
|
||||
shabal512_4x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
haval256_5_4way_context haval;
|
||||
sha512_4x64_context sha512;
|
||||
haval256_4x32_context haval;
|
||||
};
|
||||
typedef union _xevan_4way_context_overlay xevan_4way_context_overlay;
|
||||
|
||||
@@ -440,12 +440,12 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
const int dataLen = 128;
|
||||
xevan_4way_context_overlay ctx __attribute__ ((aligned (64)));
|
||||
|
||||
blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||
blake512_4x64_full( &ctx.blake, vhash, input, 80 );
|
||||
memset( &vhash[8<<2], 0, 64<<2 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_4x64_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -469,15 +469,15 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
skein512_4x64_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, dataLen );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, dataLen );
|
||||
jh512_4x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, dataLen );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, dataLen );
|
||||
keccak512_4x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
|
||||
@@ -518,9 +518,9 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_4x64_init( &ctx.hamsi );
|
||||
hamsi512_4x64_update( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_4x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
@@ -532,9 +532,9 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
// Parallel 4way 32 bit
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, dataLen );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
shabal512_4x32_init( &ctx.shabal );
|
||||
shabal512_4x32_update( &ctx.shabal, vhash, dataLen );
|
||||
shabal512_4x32_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
@@ -546,27 +546,27 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, dataLen );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
sha512_4x64_init( &ctx.sha512 );
|
||||
sha512_4x64_update( &ctx.sha512, vhash, dataLen );
|
||||
sha512_4x64_close( &ctx.sha512, vhash );
|
||||
|
||||
rintrlv_4x64_4x32( vhashA, vhash, dataLen<<3 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way_update( &ctx.haval, vhashA, dataLen );
|
||||
haval256_5_4way_close( &ctx.haval, vhashA );
|
||||
haval256_4x32_init( &ctx.haval );
|
||||
haval256_4x32_update( &ctx.haval, vhashA, dataLen );
|
||||
haval256_4x32_close( &ctx.haval, vhashA );
|
||||
|
||||
rintrlv_4x32_4x64( vhash, vhashA, dataLen<<3 );
|
||||
|
||||
memset( &vhash[ 4<<2 ], 0, (dataLen-32) << 2 );
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way_update( &ctx.blake, vhash, dataLen );
|
||||
blake512_4way_close(&ctx.blake, vhash);
|
||||
blake512_4x64_init( &ctx.blake );
|
||||
blake512_4x64_update( &ctx.blake, vhash, dataLen );
|
||||
blake512_4x64_close(&ctx.blake, vhash);
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_4x64_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -590,15 +590,15 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
skein512_4x64_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, dataLen );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, dataLen );
|
||||
jh512_4x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, dataLen );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, dataLen );
|
||||
keccak512_4x64_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
|
||||
@@ -639,9 +639,9 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_4x64_init( &ctx.hamsi );
|
||||
hamsi512_4x64_update( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_4x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
@@ -652,9 +652,9 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, dataLen );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
shabal512_4x32_init( &ctx.shabal );
|
||||
shabal512_4x32_update( &ctx.shabal, vhash, dataLen );
|
||||
shabal512_4x32_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
@@ -665,15 +665,15 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, dataLen );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
sha512_4x64_init( &ctx.sha512 );
|
||||
sha512_4x64_update( &ctx.sha512, vhash, dataLen );
|
||||
sha512_4x64_close( &ctx.sha512, vhash );
|
||||
|
||||
rintrlv_4x64_4x32( vhashA, vhash, dataLen<<3 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way_update( &ctx.haval, vhashA, dataLen );
|
||||
haval256_5_4way_close( &ctx.haval, output );
|
||||
haval256_4x32_init( &ctx.haval );
|
||||
haval256_4x32_update( &ctx.haval, vhashA, dataLen );
|
||||
haval256_4x32_close( &ctx.haval, output );
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -32,24 +32,24 @@
|
||||
|
||||
union _x22i_8way_ctx_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
blake512_8x64_context blake;
|
||||
bmw512_8x64_context bmw;
|
||||
skein512_8x64_context skein;
|
||||
jh512_8x64_context jh;
|
||||
keccak512_8x64_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cube_4way_context cube;
|
||||
simd_4way_context simd;
|
||||
hamsi512_8way_context hamsi;
|
||||
hamsi512_8x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_8way_context shabal;
|
||||
shabal512_8x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_8way_context sha512;
|
||||
haval256_5_8way_context haval;
|
||||
sha512_8x64_context sha512;
|
||||
haval256_8x32_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
#if !defined(X22I_8WAY_SHA)
|
||||
sha256_8way_context sha256;
|
||||
sha256_8x32_context sha256;
|
||||
#endif
|
||||
#if defined(__VAES__)
|
||||
groestl512_4way_context groestl;
|
||||
@@ -88,9 +88,9 @@ int x22i_8way_hash( void *output, const void *input, int thrid )
|
||||
unsigned char hashA7[64] __attribute__((aligned(32))) = {0};
|
||||
x22i_8way_ctx_overlay ctx;
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhash, input, 80 );
|
||||
blake512_8x64_full( &ctx.blake, vhash, input, 80 );
|
||||
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
bmw512_8x64_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -120,15 +120,15 @@ int x22i_8way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_8x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8x64_close( &ctx.keccak, vhash );
|
||||
|
||||
if ( work_restart[thrid].restart ) return 0;
|
||||
|
||||
@@ -219,9 +219,9 @@ int x22i_8way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
if ( work_restart[thrid].restart ) return 0;
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
@@ -238,9 +238,9 @@ int x22i_8way_hash( void *output, const void *input, int thrid )
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
shabal512_8x32_init( &ctx.shabal );
|
||||
shabal512_8x32_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8x32_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_8x32_512( &hash0[8], &hash1[8], &hash2[8], &hash3[8],
|
||||
&hash4[8], &hash5[8], &hash6[8], &hash7[8], vhash );
|
||||
@@ -273,9 +273,9 @@ int x22i_8way_hash( void *output, const void *input, int thrid )
|
||||
intrlv_8x64_512( vhash, &hash0[16], &hash1[16], &hash2[16], &hash3[16],
|
||||
&hash4[16], &hash5[16], &hash6[16], &hash7[16] );
|
||||
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
sha512_8x64_init( &ctx.sha512 );
|
||||
sha512_8x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8x64_close( &ctx.sha512, vhash );
|
||||
|
||||
dintrlv_8x64_512( &hash0[24], &hash1[24], &hash2[24], &hash3[24],
|
||||
&hash4[24], &hash5[24], &hash6[24], &hash7[24], vhash );
|
||||
@@ -294,9 +294,9 @@ int x22i_8way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
memset( vhash, 0, 64*8 );
|
||||
|
||||
haval256_5_8way_init( &ctx.haval );
|
||||
haval256_5_8way_update( &ctx.haval, vhashA, 64 );
|
||||
haval256_5_8way_close( &ctx.haval, vhash );
|
||||
haval256_8x32_init( &ctx.haval );
|
||||
haval256_8x32_update( &ctx.haval, vhashA, 64 );
|
||||
haval256_8x32_close( &ctx.haval, vhash );
|
||||
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
@@ -400,9 +400,9 @@ int x22i_8way_hash( void *output, const void *input, int thrid )
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
sha256_8way_init( &ctx.sha256 );
|
||||
sha256_8way_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_8way_close( &ctx.sha256, output );
|
||||
sha256_8x32_init( &ctx.sha256 );
|
||||
sha256_8x32_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_8x32_close( &ctx.sha256, output );
|
||||
|
||||
#endif
|
||||
|
||||
@@ -427,8 +427,6 @@ int scanhash_x22i_8way_sha( struct work *work, uint32_t max_nonce,
|
||||
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
@@ -472,8 +470,6 @@ int scanhash_x22i_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
@@ -506,8 +502,8 @@ int scanhash_x22i_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
union _x22i_4way_ctx_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
blake512_4x64_context blake;
|
||||
bmw512_4x64_context bmw;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
echo_2way_context echo;
|
||||
@@ -516,22 +512,22 @@ union _x22i_4way_ctx_overlay
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
shavite512_2way_context shavite;
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
skein512_4x64_context skein;
|
||||
jh512_4x64_context jh;
|
||||
keccak512_4x64_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cube_2way_context cube;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
hamsi512_4x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_4way_context shabal;
|
||||
shabal512_4x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
haval256_5_4way_context haval;
|
||||
sha512_4x64_context sha512;
|
||||
haval256_4x32_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
#if !defined(X22I_4WAY_SHA)
|
||||
sha256_4way_context sha256;
|
||||
sha256_4x32_context sha256;
|
||||
#endif
|
||||
};
|
||||
typedef union _x22i_4way_ctx_overlay x22i_ctx_overlay;
|
||||
@@ -551,11 +547,11 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
|
||||
unsigned char hashA3[64] __attribute__((aligned(32))) = {0};
|
||||
x22i_ctx_overlay ctx;
|
||||
|
||||
blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||
blake512_4x64_full( &ctx.blake, vhash, input, 80 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4x64_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -579,15 +575,15 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_4x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4x64_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4x64_close( &ctx.keccak, vhash );
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
@@ -632,9 +628,9 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_4x64_init( &ctx.hamsi );
|
||||
hamsi512_4x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4x64_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
fugue512_full( &ctx.fugue, hash0, hash0, 64 );
|
||||
@@ -644,9 +640,9 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
shabal512_4x32_init( &ctx.shabal );
|
||||
shabal512_4x32_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4x32_close( &ctx.shabal, vhash );
|
||||
dintrlv_4x32_512( &hash0[8], &hash1[8], &hash2[8], &hash3[8], vhash );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
@@ -664,9 +660,9 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
intrlv_4x64_512( vhash, &hash0[16], &hash1[16], &hash2[16], &hash3[16] );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
sha512_4x64_init( &ctx.sha512 );
|
||||
sha512_4x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4x64_close( &ctx.sha512, vhash );
|
||||
dintrlv_4x64_512( &hash0[24], &hash1[24], &hash2[24], &hash3[24], vhash );
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
@@ -680,9 +676,9 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
memset( vhash, 0, 64*4 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way_update( &ctx.haval, vhashA, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, vhash );
|
||||
haval256_4x32_init( &ctx.haval );
|
||||
haval256_4x32_update( &ctx.haval, vhashA, 64 );
|
||||
haval256_4x32_close( &ctx.haval, vhash );
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
memset( hashA0, 0, 64 );
|
||||
@@ -743,9 +739,9 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
sha256_4way_init( &ctx.sha256 );
|
||||
sha256_4way_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_4way_close( &ctx.sha256, output );
|
||||
sha256_4x32_init( &ctx.sha256 );
|
||||
sha256_4x32_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_4x32_close( &ctx.sha256, output );
|
||||
|
||||
#endif
|
||||
|
||||
@@ -770,8 +766,6 @@ int scanhash_x22i_4way_sha( struct work* work, uint32_t max_nonce,
|
||||
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
@@ -814,8 +808,6 @@ int scanhash_x22i_4way( struct work* work, uint32_t max_nonce,
|
||||
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
|
||||
@@ -33,6 +33,7 @@ bool register_x22i_algo( algo_gate_t* gate )
|
||||
|
||||
gate->optimizations = SSE2_OPT | SSE42_OPT | AES_OPT | AVX2_OPT | SHA256_OPT
|
||||
| AVX512_OPT | VAES_OPT | NEON_OPT;
|
||||
InitializeSWIFFTX();
|
||||
return true;
|
||||
};
|
||||
|
||||
|
||||
@@ -63,29 +63,29 @@ void x25x_shuffle( void *hash )
|
||||
|
||||
union _x25x_8way_ctx_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
blake512_8x64_context blake;
|
||||
bmw512_8x64_context bmw;
|
||||
skein512_8x64_context skein;
|
||||
jh512_8x64_context jh;
|
||||
keccak512_8x64_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cube_4way_context cube;
|
||||
simd_4way_context simd;
|
||||
hamsi512_8way_context hamsi;
|
||||
hamsi512_8x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_8way_context shabal;
|
||||
shabal512_8x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_8way_context sha512;
|
||||
haval256_5_8way_context haval;
|
||||
sha512_8x64_context sha512;
|
||||
haval256_8x32_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
#if defined(X25X_8WAY_SHA)
|
||||
sha256_context sha256;
|
||||
#else
|
||||
sha256_8way_context sha256;
|
||||
sha256_8x32_context sha256;
|
||||
#endif
|
||||
panama_8way_context panama;
|
||||
blake2s_8way_state blake2s;
|
||||
panama_8x32_context panama;
|
||||
blake2s_8x32_state blake2s;
|
||||
#if defined(__VAES__)
|
||||
groestl512_4way_context groestl;
|
||||
shavite512_4way_context shavite;
|
||||
@@ -99,7 +99,7 @@ union _x25x_8way_ctx_overlay
|
||||
typedef union _x25x_8way_ctx_overlay x25x_8way_ctx_overlay;
|
||||
|
||||
static __thread __m512i x25x_8way_midstate[16] __attribute__((aligned(64)));
|
||||
static __thread blake512_8way_context blake512_8way_ctx __attribute__((aligned(64)));
|
||||
static __thread blake512_8x64_context blake512_8x64_ctx __attribute__((aligned(64)));
|
||||
|
||||
int x25x_8way_hash( void *output, const void *input, int thrid )
|
||||
{
|
||||
@@ -117,15 +117,15 @@ int x25x_8way_hash( void *output, const void *input, int thrid )
|
||||
uint64_t vhashB[8*8] __attribute__ ((aligned (64)));
|
||||
x25x_8way_ctx_overlay ctx __attribute__ ((aligned (64)));
|
||||
|
||||
blake512_8way_final_le( &blake512_8way_ctx, vhash, casti_m512i( input, 9 ),
|
||||
blake512_8x64_final_le( &blake512_8x64_ctx, vhash, casti_m512i( input, 9 ),
|
||||
x25x_8way_midstate );
|
||||
|
||||
dintrlv_8x64_512( hash0[0], hash1[0], hash2[0], hash3[0],
|
||||
hash4[0], hash5[0], hash6[0], hash7[0], vhash );
|
||||
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
bmw512_8x64_init( &ctx.bmw );
|
||||
bmw512_8x64_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8x64_close( &ctx.bmw, vhash );
|
||||
dintrlv_8x64_512( hash0[1], hash1[1], hash2[1], hash3[1],
|
||||
hash4[1], hash5[1], hash6[1], hash7[1], vhash );
|
||||
|
||||
@@ -175,21 +175,19 @@ int x25x_8way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
skein512_8x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
dintrlv_8x64_512( hash0[3], hash1[3], hash2[3], hash3[3],
|
||||
hash4[3], hash5[3], hash6[3], hash7[3], vhash );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
jh512_8x64_init( &ctx.jh );
|
||||
jh512_8x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8x64_close( &ctx.jh, vhash );
|
||||
dintrlv_8x64_512( hash0[4], hash1[4], hash2[4], hash3[4],
|
||||
hash4[4], hash5[4], hash6[4], hash7[4], vhash );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
keccak512_8x64_init( &ctx.keccak );
|
||||
keccak512_8x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8x64_close( &ctx.keccak, vhash );
|
||||
dintrlv_8x64_512( hash0[5], hash1[5], hash2[5], hash3[5],
|
||||
hash4[5], hash5[5], hash6[5], hash7[5], vhash );
|
||||
|
||||
@@ -303,9 +301,9 @@ int x25x_8way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
if ( work_restart[thrid].restart ) return 0;
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_8x64_init( &ctx.hamsi );
|
||||
hamsi512_8x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8x64_close( &ctx.hamsi, vhash );
|
||||
dintrlv_8x64_512( hash0[11], hash1[11], hash2[11], hash3[11],
|
||||
hash4[11], hash5[11], hash6[11], hash7[11], vhash );
|
||||
|
||||
@@ -321,9 +319,9 @@ int x25x_8way_hash( void *output, const void *input, int thrid )
|
||||
intrlv_8x32_512( vhash, hash0[12], hash1[12], hash2[12], hash3[12],
|
||||
hash4[12], hash5[12], hash6[12], hash7[12] );
|
||||
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
shabal512_8x32_init( &ctx.shabal );
|
||||
shabal512_8x32_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8x32_close( &ctx.shabal, vhash );
|
||||
dintrlv_8x32_512( hash0[13], hash1[13], hash2[13], hash3[13],
|
||||
hash4[13], hash5[13], hash6[13], hash7[13], vhash );
|
||||
|
||||
@@ -354,9 +352,9 @@ int x25x_8way_hash( void *output, const void *input, int thrid )
|
||||
intrlv_8x64_512( vhash, hash0[14], hash1[14], hash2[14], hash3[14],
|
||||
hash4[14], hash5[14], hash6[14], hash7[14] );
|
||||
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
sha512_8x64_init( &ctx.sha512 );
|
||||
sha512_8x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8x64_close( &ctx.sha512, vhash );
|
||||
dintrlv_8x64_512( hash0[15], hash1[15], hash2[15], hash3[15],
|
||||
hash4[15], hash5[15], hash6[15], hash7[15], vhash );
|
||||
|
||||
@@ -372,9 +370,9 @@ int x25x_8way_hash( void *output, const void *input, int thrid )
|
||||
hash4[16], hash5[16], hash6[16], hash7[16] );
|
||||
memset( vhash, 0, 64*8 );
|
||||
|
||||
haval256_5_8way_init( &ctx.haval );
|
||||
haval256_5_8way_update( &ctx.haval, vhashA, 64 );
|
||||
haval256_5_8way_close( &ctx.haval, vhash );
|
||||
haval256_8x32_init( &ctx.haval );
|
||||
haval256_8x32_update( &ctx.haval, vhashA, 64 );
|
||||
haval256_8x32_close( &ctx.haval, vhash );
|
||||
dintrlv_8x32_512( hash0[17], hash1[17], hash2[17], hash3[17],
|
||||
hash4[17], hash5[17], hash6[17], hash7[17], vhash );
|
||||
|
||||
@@ -462,17 +460,17 @@ int x25x_8way_hash( void *output, const void *input, int thrid )
|
||||
intrlv_8x32_512( vhashA, hash0[20], hash1[20], hash2[20], hash3[20],
|
||||
hash4[20], hash5[20], hash6[20], hash7[20] );
|
||||
|
||||
sha256_8way_init( &ctx.sha256 );
|
||||
sha256_8way_update( &ctx.sha256, vhashA, 64 );
|
||||
sha256_8way_close( &ctx.sha256, vhash );
|
||||
sha256_8x32_init( &ctx.sha256 );
|
||||
sha256_8x32_update( &ctx.sha256, vhashA, 64 );
|
||||
sha256_8x32_close( &ctx.sha256, vhash );
|
||||
dintrlv_8x32_512( hash0[21], hash1[21], hash2[21], hash3[21],
|
||||
hash4[21], hash5[21], hash6[21], hash7[21], vhash );
|
||||
|
||||
#endif
|
||||
|
||||
panama_8way_init( &ctx.panama );
|
||||
panama_8way_update( &ctx.panama, vhash, 64 );
|
||||
panama_8way_close( &ctx.panama, vhash );
|
||||
panama_8x32_init( &ctx.panama );
|
||||
panama_8x32_update( &ctx.panama, vhash, 64 );
|
||||
panama_8x32_close( &ctx.panama, vhash );
|
||||
dintrlv_8x32_512( hash0[22], hash1[22], hash2[22], hash3[22],
|
||||
hash4[22], hash5[22], hash6[22], hash7[22], vhash );
|
||||
|
||||
@@ -545,8 +543,8 @@ int x25x_8way_hash( void *output, const void *input, int thrid )
|
||||
intrlv_8x32_512( vhashX[23], hash0[23], hash1[23], hash2[23], hash3[23],
|
||||
hash4[23], hash5[23], hash6[23], hash7[23] );
|
||||
|
||||
blake2s_8way_init( &ctx.blake2s, 32 );
|
||||
blake2s_8way_full_blocks( &ctx.blake2s, output, vhashX, 64*24 );
|
||||
blake2s_8x32_init( &ctx.blake2s, 32 );
|
||||
blake2s_8x32_full_blocks( &ctx.blake2s, output, vhashX, 64*24 );
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -578,14 +576,13 @@ int scanhash_x25x_8way( struct work *work, uint32_t max_nonce,
|
||||
edata[4] = v128_swap64_32( casti_v128( pdata, 4 ) );
|
||||
|
||||
mm512_intrlv80_8x64( vdata, edata );
|
||||
*noncev = _mm512_add_epi32( *noncev, _mm512_set_epi32(
|
||||
0, 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0 ) );
|
||||
blake512_8way_prehash_le( &blake512_8way_ctx, x25x_8way_midstate, vdata );
|
||||
*noncev = _mm512_add_epi32( *noncev, _mm512_set_epi64(
|
||||
7, 6, 5, 4, 3, 2, 1, 0 ) );
|
||||
blake512_8x64_prehash_le( &blake512_8x64_ctx, x25x_8way_midstate, vdata );
|
||||
|
||||
do
|
||||
{
|
||||
if ( x25x_8way_hash( hash, vdata, thr_id ) );
|
||||
|
||||
if ( x25x_8way_hash( hash, vdata, thr_id ) )
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
|
||||
{
|
||||
@@ -608,8 +605,8 @@ int scanhash_x25x_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
union _x25x_4way_ctx_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
blake512_4x64_context blake;
|
||||
bmw512_4x64_context bmw;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
echo_2way_context echo;
|
||||
@@ -617,34 +614,34 @@ union _x25x_4way_ctx_overlay
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
skein512_4x64_context skein;
|
||||
jh512_4x64_context jh;
|
||||
keccak512_4x64_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cube_2way_context cube;
|
||||
shavite512_2way_context shavite;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
hamsi512_4x64_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_4way_context shabal;
|
||||
shabal512_4x32_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
haval256_5_4way_context haval;
|
||||
sha512_4x64_context sha512;
|
||||
haval256_4x32_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
#if defined(X25X_4WAY_SHA)
|
||||
sha256_context sha256;
|
||||
#else
|
||||
sha256_4way_context sha256;
|
||||
sha256_4x32_context sha256;
|
||||
#endif
|
||||
panama_4way_context panama;
|
||||
blake2s_4way_state blake2s;
|
||||
panama_4x32_context panama;
|
||||
blake2s_4x32_state blake2s;
|
||||
};
|
||||
|
||||
typedef union _x25x_4way_ctx_overlay x25x_4way_ctx_overlay;
|
||||
|
||||
static __thread __m256i x25x_4way_midstate[16] __attribute__((aligned(64)));
|
||||
static __thread blake512_4way_context blake512_4way_ctx __attribute__((aligned(64)));
|
||||
static __thread blake512_4x64_context blake512_4x64_ctx __attribute__((aligned(64)));
|
||||
|
||||
int x25x_4way_hash( void *output, const void *input, int thrid )
|
||||
{
|
||||
@@ -658,14 +655,14 @@ int x25x_4way_hash( void *output, const void *input, int thrid )
|
||||
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
|
||||
x25x_4way_ctx_overlay ctx __attribute__ ((aligned (64)));
|
||||
|
||||
blake512_4way_final_le( &blake512_4way_ctx, vhash, casti_m256i( input, 9 ),
|
||||
blake512_4x64_final_le( &blake512_4x64_ctx, vhash, casti_m256i( input, 9 ),
|
||||
x25x_4way_midstate );
|
||||
|
||||
dintrlv_4x64_512( hash0[0], hash1[0], hash2[0], hash3[0], vhash );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
bmw512_4x64_init( &ctx.bmw );
|
||||
bmw512_4x64_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4x64_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64_512( hash0[1], hash1[1], hash2[1], hash3[1], vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
@@ -688,19 +685,19 @@ int x25x_4way_hash( void *output, const void *input, int thrid )
|
||||
#endif
|
||||
|
||||
intrlv_4x64_512( vhash, hash0[2], hash1[2], hash2[2], hash3[2] );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
skein512_4x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||
dintrlv_4x64_512( hash0[3], hash1[3], hash2[3], hash3[3], vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4x64_init( &ctx.jh );
|
||||
jh512_4x64_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4x64_close( &ctx.jh, vhash );
|
||||
dintrlv_4x64_512( hash0[4], hash1[4], hash2[4], hash3[4], vhash );
|
||||
|
||||
if ( work_restart[thrid].restart ) return 0;
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
keccak512_4x64_init( &ctx.keccak );
|
||||
keccak512_4x64_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4x64_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64_512( hash0[5], hash1[5], hash2[5], hash3[5], vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
@@ -751,9 +748,9 @@ int x25x_4way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
if ( work_restart[thrid].restart ) return 0;
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
hamsi512_4x64_init( &ctx.hamsi );
|
||||
hamsi512_4x64_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4x64_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64_512( hash0[11], hash1[11], hash2[11], hash3[11], vhash );
|
||||
|
||||
fugue512_full( &ctx.fugue, hash0[12], hash0[11], 64 );
|
||||
@@ -763,9 +760,9 @@ int x25x_4way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
intrlv_4x32_512( vhash, hash0[12], hash1[12], hash2[12], hash3[12] );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
shabal512_4x32_init( &ctx.shabal );
|
||||
shabal512_4x32_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4x32_close( &ctx.shabal, vhash );
|
||||
dintrlv_4x32_512( hash0[13], hash1[13], hash2[13], hash3[13], vhash );
|
||||
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
@@ -783,9 +780,9 @@ int x25x_4way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0[14], hash1[14], hash2[14], hash3[14] );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
sha512_4x64_init( &ctx.sha512 );
|
||||
sha512_4x64_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4x64_close( &ctx.sha512, vhash );
|
||||
dintrlv_4x64_512( hash0[15], hash1[15], hash2[15], hash3[15], vhash );
|
||||
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash0[12], (unsigned char*)hash0[16]);
|
||||
@@ -797,9 +794,9 @@ int x25x_4way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
memset( vhash, 0, 64*4 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way_update( &ctx.haval, vhashX[0], 64 );
|
||||
haval256_5_4way_close( &ctx.haval, vhash );
|
||||
haval256_4x32_init( &ctx.haval );
|
||||
haval256_4x32_update( &ctx.haval, vhashX[0], 64 );
|
||||
haval256_4x32_close( &ctx.haval, vhash );
|
||||
dintrlv_4x32_512( hash0[17], hash1[17], hash2[17], hash3[17], vhash );
|
||||
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
@@ -853,16 +850,16 @@ int x25x_4way_hash( void *output, const void *input, int thrid )
|
||||
intrlv_4x32_512( vhashX[0], hash0[20], hash1[20], hash2[20], hash3[20] );
|
||||
memset( vhash, 0, 64*4 );
|
||||
|
||||
sha256_4way_init( &ctx.sha256 );
|
||||
sha256_4way_update( &ctx.sha256, vhashX[0], 64 );
|
||||
sha256_4way_close( &ctx.sha256, vhash );
|
||||
sha256_4x32_init( &ctx.sha256 );
|
||||
sha256_4x32_update( &ctx.sha256, vhashX[0], 64 );
|
||||
sha256_4x32_close( &ctx.sha256, vhash );
|
||||
dintrlv_4x32_512( hash0[21], hash1[21], hash2[21], hash3[21], vhash );
|
||||
|
||||
#endif
|
||||
|
||||
panama_4way_init( &ctx.panama );
|
||||
panama_4way_update( &ctx.panama, vhash, 64 );
|
||||
panama_4way_close( &ctx.panama, vhash );
|
||||
panama_4x32_init( &ctx.panama );
|
||||
panama_4x32_update( &ctx.panama, vhash, 64 );
|
||||
panama_4x32_close( &ctx.panama, vhash );
|
||||
dintrlv_4x32_512( hash0[22], hash1[22], hash2[22], hash3[22], vhash );
|
||||
|
||||
laneHash(512, (const BitSequence*)hash0[22], 512, (BitSequence*)hash0[23]);
|
||||
@@ -902,8 +899,8 @@ int x25x_4way_hash( void *output, const void *input, int thrid )
|
||||
intrlv_4x32_512( vhashX[22], hash0[22], hash1[22], hash2[22], hash3[22] );
|
||||
intrlv_4x32_512( vhashX[23], hash0[23], hash1[23], hash2[23], hash3[23] );
|
||||
|
||||
blake2s_4way_init( &ctx.blake2s, 32 );
|
||||
blake2s_4way_full_blocks( &ctx.blake2s, output, vhashX, 64*24 );
|
||||
blake2s_4x32_init( &ctx.blake2s, 32 );
|
||||
blake2s_4x32_full_blocks( &ctx.blake2s, output, vhashX, 64*24 );
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -936,9 +933,8 @@ int scanhash_x25x_4way( struct work* work, uint32_t max_nonce,
|
||||
edata[4] = v128_swap64_32( casti_v128( pdata, 4 ) );
|
||||
|
||||
mm256_intrlv80_4x64( vdata, edata );
|
||||
*noncev = _mm256_add_epi32( *noncev, _mm256_set_epi32(
|
||||
0, 3, 0, 2, 0, 1, 0, 0 ) );
|
||||
blake512_4way_prehash_le( &blake512_4way_ctx, x25x_4way_midstate, vdata );
|
||||
*noncev = _mm256_add_epi32( *noncev, _mm256_set_epi64x( 3, 2, 1, 0 ) );
|
||||
blake512_4x64_prehash_le( &blake512_4x64_ctx, x25x_4way_midstate, vdata );
|
||||
|
||||
do
|
||||
{
|
||||
|
||||
@@ -231,7 +231,7 @@ int scanhash_x25x( struct work *work, uint32_t max_nonce,
|
||||
do
|
||||
{
|
||||
edata[19] = n;
|
||||
if ( x25x_hash( hash64, edata, thr_id ) );
|
||||
if ( x25x_hash( hash64, edata, thr_id ) )
|
||||
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n );
|
||||
|
||||
@@ -1,692 +0,0 @@
|
||||
/*-
|
||||
* Copyright 2009 Colin Percival
|
||||
* Copyright 2013-2018 Alexander Peslyak
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* This file was originally written by Colin Percival as part of the Tarsnap
|
||||
* online backup system.
|
||||
*
|
||||
* This is a proof-of-work focused fork of yescrypt, including reference and
|
||||
* cut-down implementation of the obsolete yescrypt 0.5 (based off its first
|
||||
* submission to PHC back in 2014) and a new proof-of-work specific variation
|
||||
* known as yespower 1.0. The former is intended as an upgrade for
|
||||
* cryptocurrencies that already use yescrypt 0.5 and the latter may be used
|
||||
* as a further upgrade (hard fork) by those and other cryptocurrencies. The
|
||||
* version of algorithm to use is requested through parameters, allowing for
|
||||
* both algorithms to co-exist in client and miner implementations (such as in
|
||||
* preparation for a hard-fork).
|
||||
*
|
||||
* This is the reference implementation. Its purpose is to provide a simple
|
||||
* human- and machine-readable specification that implementations intended
|
||||
* for actual use should be tested against. It is deliberately mostly not
|
||||
* optimized, and it is not meant to be used in production. Instead, use
|
||||
* yespower-opt.c.
|
||||
*/
|
||||
/*
|
||||
#warning "This reference implementation is deliberately mostly not optimized. Use yespower-opt.c instead unless you're testing (against) the reference implementation on purpose."
|
||||
*/
|
||||
#include <errno.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/sha/hmac-sha256-hash-4way.h"
|
||||
//#include "sysendian.h"
|
||||
|
||||
#include "yespower.h"
|
||||
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
|
||||
static void blkcpy_8way( __m256i *dst, const __m256i *src, size_t count )
|
||||
{
|
||||
do {
|
||||
*dst++ = *src++;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
static void blkxor_8way( __m256i *dst, const __m256i *src, size_t count )
|
||||
{
|
||||
do {
|
||||
*dst++ ^= *src++;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
/**
|
||||
* salsa20(B):
|
||||
* Apply the Salsa20 core to the provided block.
|
||||
*/
|
||||
static void salsa20_8way( __m256i B[16], uint32_t rounds )
|
||||
{
|
||||
__m256i x[16];
|
||||
size_t i;
|
||||
|
||||
/* SIMD unshuffle */
|
||||
for ( i = 0; i < 16; i++ )
|
||||
x[i * 5 % 16] = B[i];
|
||||
|
||||
for ( i = 0; i < rounds; i += 2 )
|
||||
{
|
||||
#define R( a, b, c ) mm256_rol_32( _mm256_add_epi32( a, b ), c )
|
||||
/* Operate on columns */
|
||||
|
||||
x[ 4] = _mm256_xor_si256( x[ 4], R( x[ 0], x[12], 7 ) );
|
||||
x[ 8] = _mm256_xor_si256( x[ 8], R( x[ 4], x[ 0], 9 ) );
|
||||
x[12] = _mm256_xor_si256( x[12], R( x[ 8], x[ 4], 13 ) );
|
||||
x[ 0] = _mm256_xor_si256( x[ 0], R( x[12], x[ 8], 18 ) );
|
||||
|
||||
x[ 9] = _mm256_xor_si256( x[ 9], R( x[ 5], x[ 1], 7 ) );
|
||||
x[13] = _mm256_xor_si256( x[13], R( x[ 9], x[ 5], 9 ) );
|
||||
x[ 1] = _mm256_xor_si256( x[ 1], R( x[13], x[ 9], 13 ) );
|
||||
x[ 5] = _mm256_xor_si256( x[ 5], R( x[ 1], x[13], 18 ) );
|
||||
|
||||
x[14] = _mm256_xor_si256( x[14], R( x[10], x[ 6], 7 ) );
|
||||
x[ 2] = _mm256_xor_si256( x[ 2], R( x[14], x[10], 9 ) );
|
||||
x[ 6] = _mm256_xor_si256( x[ 6], R( x[ 2], x[14], 13 ) );
|
||||
x[10] = _mm256_xor_si256( x[10], R( x[ 6], x[ 2], 18 ) );
|
||||
|
||||
x[ 3] = _mm256_xor_si256( x[ 3], R( x[15], x[11], 7 ) );
|
||||
x[ 7] = _mm256_xor_si256( x[ 7], R( x[ 3], x[15], 9 ) );
|
||||
x[11] = _mm256_xor_si256( x[11], R( x[ 7], x[ 3], 13 ) );
|
||||
x[15] = _mm256_xor_si256( x[15], R( x[11], x[ 7], 18 ) );
|
||||
|
||||
/* Operate on rows */
|
||||
|
||||
x[ 1] = _mm256_xor_si256( x[ 1], R( x[ 0], x[ 3], 7 ) );
|
||||
x[ 2] = _mm256_xor_si256( x[ 2], R( x[ 1], x[ 0], 9 ) );
|
||||
x[ 3] = _mm256_xor_si256( x[ 3], R( x[ 2], x[ 1], 13 ) );
|
||||
x[ 0] = _mm256_xor_si256( x[ 0], R( x[ 3], x[ 2], 18 ) );
|
||||
|
||||
x[ 6] = _mm256_xor_si256( x[ 6], R( x[ 5], x[ 4], 7 ) );
|
||||
x[ 7] = _mm256_xor_si256( x[ 7], R( x[ 6], x[ 5], 9 ) );
|
||||
x[ 4] = _mm256_xor_si256( x[ 4], R( x[ 7], x[ 6], 13 ) );
|
||||
x[ 5] = _mm256_xor_si256( x[ 5], R( x[ 4], x[ 7], 18 ) );
|
||||
|
||||
x[11] = _mm256_xor_si256( x[11], R( x[10], x[ 9], 7 ) );
|
||||
x[ 8] = _mm256_xor_si256( x[ 8], R( x[11], x[10], 9 ) );
|
||||
x[ 9] = _mm256_xor_si256( x[ 9], R( x[ 8], x[11], 13 ) );
|
||||
x[10] = _mm256_xor_si256( x[10], R( x[ 9], x[ 8], 18 ) );
|
||||
|
||||
x[12] = _mm256_xor_si256( x[12], R( x[15], x[14], 7 ) );
|
||||
x[13] = _mm256_xor_si256( x[13], R( x[12], x[15], 9 ) );
|
||||
x[14] = _mm256_xor_si256( x[14], R( x[13], x[12], 13 ) );
|
||||
x[15] = _mm256_xor_si256( x[15], R( x[14], x[13], 18 ) );
|
||||
|
||||
#undef R
|
||||
}
|
||||
|
||||
/* SIMD shuffle */
|
||||
for (i = 0; i < 16; i++)
|
||||
B[i] = _mm256_add_epi32( B[i], x[i * 5 % 16] );
|
||||
}
|
||||
|
||||
/**
|
||||
* blockmix_salsa(B):
|
||||
* Compute B = BlockMix_{salsa20, 1}(B). The input B must be 128 bytes in
|
||||
* length.
|
||||
*/
|
||||
static void blockmix_salsa_8way( __m256i *B, uint32_t rounds )
|
||||
{
|
||||
__m256i X[16];
|
||||
size_t i;
|
||||
|
||||
/* 1: X <-- B_{2r - 1} */
|
||||
blkcpy_8way( X, &B[16], 16 );
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for ( i = 0; i < 2; i++ )
|
||||
{
|
||||
/* 3: X <-- H(X xor B_i) */
|
||||
blkxor_8way( X, &B[i * 16], 16 );
|
||||
salsa20_8way( X, rounds );
|
||||
|
||||
/* 4: Y_i <-- X */
|
||||
/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
|
||||
blkcpy_8way( &B[i * 16], X, 16 );
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* These are tunable, but they must meet certain constraints and are part of
|
||||
* what defines a yespower version.
|
||||
*/
|
||||
#define PWXsimple 2
|
||||
#define PWXgather 4
|
||||
/* Version 0.5 */
|
||||
#define PWXrounds_0_5 6
|
||||
#define Swidth_0_5 8
|
||||
/* Version 1.0 */
|
||||
#define PWXrounds_1_0 3
|
||||
#define Swidth_1_0 11
|
||||
|
||||
/* Derived values. Not tunable on their own. */
|
||||
#define PWXbytes (PWXgather * PWXsimple * 8)
|
||||
#define PWXwords (PWXbytes / sizeof(uint32_t))
|
||||
#define rmin ((PWXbytes + 127) / 128)
|
||||
|
||||
/* Runtime derived values. Not tunable on their own. */
|
||||
#define Swidth_to_Sbytes1(Swidth) ((1 << Swidth) * PWXsimple * 8)
|
||||
#define Swidth_to_Smask(Swidth) (((1 << Swidth) - 1) * PWXsimple * 8)
|
||||
|
||||
typedef struct {
|
||||
__m256i (*S0)[2], (*S1)[2], (*S2)[2];
|
||||
__m256i *S;
|
||||
yespower_version_t version;
|
||||
uint32_t salsa20_rounds;
|
||||
uint32_t PWXrounds, Swidth, Sbytes, Smask;
|
||||
size_t w;
|
||||
} pwxform_8way_ctx_t __attribute__ ((aligned (128)));
|
||||
|
||||
/**
|
||||
* pwxform(B):
|
||||
* Transform the provided block using the provided S-boxes.
|
||||
*/
|
||||
static void pwxform_8way( __m256i *B, pwxform_8way_ctx_t *ctx )
|
||||
{
|
||||
__m256i (*X)[PWXsimple][2] = (__m256i (*)[PWXsimple][2])B;
|
||||
__m256i (*S0)[2] = ctx->S0, (*S1)[2] = ctx->S1, (*S2)[2] = ctx->S2;
|
||||
__m256i Smask = _mm256_set1_epi32( ctx->Smask );
|
||||
size_t w = ctx->w;
|
||||
size_t i, j, k;
|
||||
|
||||
/* 1: for i = 0 to PWXrounds - 1 do */
|
||||
for ( i = 0; i < ctx->PWXrounds; i++ )
|
||||
{
|
||||
/* 2: for j = 0 to PWXgather - 1 do */
|
||||
for ( j = 0; j < PWXgather; j++ )
|
||||
{
|
||||
// Are these pointers or data?
|
||||
__m256i xl = X[j][0][0];
|
||||
__m256i xh = X[j][0][1];
|
||||
__m256i (*p0)[2], (*p1)[2];
|
||||
|
||||
// 3: p0 <-- (lo(B_{j,0}) & Smask) / (PWXsimple * 8)
|
||||
|
||||
// playing with pointers
|
||||
/*
|
||||
p0 = S0 + (xl & Smask) / sizeof(*S0);
|
||||
// 4: p1 <-- (hi(B_{j,0}) & Smask) / (PWXsimple * 8)
|
||||
p1 = S1 + (xh & Smask) / sizeof(*S1);
|
||||
*/
|
||||
/* 5: for k = 0 to PWXsimple - 1 do */
|
||||
for ( k = 0; k < PWXsimple; k++ )
|
||||
{
|
||||
|
||||
// shift from 32 bit data to 64 bit data
|
||||
__m256i x0, x1, s00, s01, s10, s11;
|
||||
__m128i *p0k = (__m128i*)p0[k];
|
||||
__m128i *p1k = (__m128i*)p1[k];
|
||||
|
||||
|
||||
s00 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p0k[0] ),
|
||||
_mm256_slli_epi64( _mm256_cvtepu32_epi64( p0k[2] ), 32 ) );
|
||||
s01 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p0k[1] ),
|
||||
_mm256_slli_epi64( _mm256_cvtepu32_epi64( p0k[3] ), 32 ) );
|
||||
s10 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p1k[0] ),
|
||||
_mm256_slli_epi64( _mm256_cvtepu32_epi64( p1k[2] ), 32 ) );
|
||||
s11 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p1k[1] ),
|
||||
_mm256_slli_epi64( _mm256_cvtepu32_epi64( p1k[3] ), 32 ) );
|
||||
|
||||
__m128i *xx = (__m128i*)X[j][k];
|
||||
x0 = _mm256_mul_epu32( _mm256_cvtepu32_epi64( xx[0] ),
|
||||
_mm256_cvtepu32_epi64( xx[2] ) );
|
||||
x1 = _mm256_mul_epu32( _mm256_cvtepu32_epi64( xx[1] ),
|
||||
_mm256_cvtepu32_epi64( xx[3] ) );
|
||||
|
||||
x0 = _mm256_add_epi64( x0, s00 );
|
||||
x1 = _mm256_add_epi64( x1, s01 );
|
||||
|
||||
x0 = _mm256_xor_si256( x0, s10 );
|
||||
x1 = _mm256_xor_si256( x1, s11 );
|
||||
|
||||
X[j][k][0] = x0;
|
||||
X[j][k][1] = x1;
|
||||
}
|
||||
|
||||
if ( ctx->version != YESPOWER_0_5 &&
|
||||
( i == 0 || j < PWXgather / 2 ) )
|
||||
{
|
||||
if ( j & 1 )
|
||||
{
|
||||
for ( k = 0; k < PWXsimple; k++ )
|
||||
{
|
||||
S1[w][0] = X[j][k][0];
|
||||
S1[w][1] = X[j][k][1];
|
||||
w++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for ( k = 0; k < PWXsimple; k++ )
|
||||
{
|
||||
S0[w + k][0] = X[j][k][0];
|
||||
S0[w + k][1] = X[j][k][1];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( ctx->version != YESPOWER_0_5 )
|
||||
{
|
||||
/* 14: (S0, S1, S2) <-- (S2, S0, S1) */
|
||||
ctx->S0 = S2;
|
||||
ctx->S1 = S0;
|
||||
ctx->S2 = S1;
|
||||
/* 15: w <-- w mod 2^Swidth */
|
||||
ctx->w = w & ( ( 1 << ctx->Swidth ) * PWXsimple - 1 );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* blockmix_pwxform(B, ctx, r):
|
||||
* Compute B = BlockMix_pwxform{salsa20, ctx, r}(B). The input B must be
|
||||
* 128r bytes in length.
|
||||
*/
|
||||
static void blockmix_pwxform_8way( uint32_t *B, pwxform_8way_ctx_t *ctx,
|
||||
size_t r )
|
||||
{
|
||||
__m256i X[PWXwords];
|
||||
size_t r1, i;
|
||||
|
||||
/* Convert 128-byte blocks to PWXbytes blocks */
|
||||
/* 1: r_1 <-- 128r / PWXbytes */
|
||||
r1 = 128 * r / PWXbytes;
|
||||
|
||||
/* 2: X <-- B'_{r_1 - 1} */
|
||||
blkcpy_8way( X, &B[ (r1 - 1) * PWXwords ], PWXwords );
|
||||
|
||||
/* 3: for i = 0 to r_1 - 1 do */
|
||||
for ( i = 0; i < r1; i++ )
|
||||
{
|
||||
/* 4: if r_1 > 1 */
|
||||
if ( r1 > 1 )
|
||||
{
|
||||
/* 5: X <-- X xor B'_i */
|
||||
blkxor_8way( X, &B[ i * PWXwords ], PWXwords );
|
||||
}
|
||||
|
||||
/* 7: X <-- pwxform(X) */
|
||||
pwxform_8way( X, ctx );
|
||||
|
||||
/* 8: B'_i <-- X */
|
||||
blkcpy_8way( &B[ i * PWXwords ], X, PWXwords );
|
||||
}
|
||||
|
||||
/* 10: i <-- floor((r_1 - 1) * PWXbytes / 64) */
|
||||
i = ( r1 - 1 ) * PWXbytes / 64;
|
||||
|
||||
/* 11: B_i <-- H(B_i) */
|
||||
salsa20_8way( &B[i * 16], ctx->salsa20_rounds );
|
||||
|
||||
#if 1 /* No-op with our current pwxform settings, but do it to make sure */
|
||||
/* 12: for i = i + 1 to 2r - 1 do */
|
||||
for ( i++; i < 2 * r; i++ )
|
||||
{
|
||||
/* 13: B_i <-- H(B_i xor B_{i-1}) */
|
||||
blkxor_8way( &B[i * 16], &B[ (i - 1) * 16 ], 16 );
|
||||
salsa20_8way( &B[i * 16], ctx->salsa20_rounds );
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// This looks a lot like data dependent addressing
|
||||
|
||||
/**
|
||||
* integerify(B, r):
|
||||
* Return the result of parsing B_{2r-1} as a little-endian integer.
|
||||
*/
|
||||
static __m256i integerify8( const __m256i *B, size_t r )
|
||||
{
|
||||
/*
|
||||
* Our 32-bit words are in host byte order. Also, they are SIMD-shuffled, but
|
||||
* we only care about the least significant 32 bits anyway.
|
||||
*/
|
||||
const __m256i *X = &B[ (2 * r - 1) * 16 ];
|
||||
return X[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* p2floor(x):
|
||||
* Largest power of 2 not greater than argument.
|
||||
*/
|
||||
static uint32_t p2floor8( uint32_t x )
|
||||
{
|
||||
uint32_t y;
|
||||
while ( ( y = x & (x - 1) ) )
|
||||
x = y;
|
||||
return x;
|
||||
}
|
||||
|
||||
/**
|
||||
* wrap(x, i):
|
||||
* Wrap x to the range 0 to i-1.
|
||||
*/
|
||||
static uint32_t wrap8( uint32_t x, uint32_t i )
|
||||
{
|
||||
uint32_t n = p2floor( i );
|
||||
return ( x & (n - 1) ) + (i - n);
|
||||
}
|
||||
|
||||
/**
|
||||
* smix1(B, r, N, V, X, ctx):
|
||||
* Compute first loop of B = SMix_r(B, N). The input B must be 128r bytes in
|
||||
* length; the temporary storage V must be 128rN bytes in length; the temporary
|
||||
* storage X must be 128r bytes in length.
|
||||
*/
|
||||
static void smix1_8way( __m256i *B, size_t r, uint32_t N,
|
||||
__m256i *V, __m256i *X, pwxform_8way_ctx_t *ctx )
|
||||
{
|
||||
size_t s = 32 * r;
|
||||
uint32_t i, j;
|
||||
size_t k;
|
||||
|
||||
/* 1: X <-- B */
|
||||
for ( k = 0; k < 2 * r; k++ )
|
||||
for ( i = 0; i < 16; i++ )
|
||||
X[ k * 16 + i ] = B[ k * 16 + ( i * 5 % 16 ) ];
|
||||
|
||||
if ( ctx->version != YESPOWER_0_5 )
|
||||
{
|
||||
for ( k = 1; k < r; k++ )
|
||||
{
|
||||
blkcpy_8way( &X[k * 32], &X[ (k - 1) * 32 ], 32 );
|
||||
blockmix_pwxform_8way( &X[k * 32], ctx, 1 );
|
||||
}
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to N - 1 do */
|
||||
for ( i = 0; i < N; i++ )
|
||||
{
|
||||
/* 3: V_i <-- X */
|
||||
blkcpy_8way( &V[i * s], X, s );
|
||||
|
||||
if ( i > 1 )
|
||||
{
|
||||
|
||||
// is j int or vector? Integrify has data dependent addressing?
|
||||
|
||||
/* j <-- Wrap(Integerify(X), i) */
|
||||
// j = wrap8( integerify8( X, r ), i );
|
||||
|
||||
/* X <-- X xor V_j */
|
||||
blkxor_8way( X, &V[j * s], s );
|
||||
}
|
||||
|
||||
/* 4: X <-- H(X) */
|
||||
if ( V != ctx->S )
|
||||
blockmix_pwxform_8way( X, ctx, r );
|
||||
else
|
||||
blockmix_salsa_8way( X, ctx->salsa20_rounds );
|
||||
}
|
||||
|
||||
/* B' <-- X */
|
||||
for ( k = 0; k < 2 * r; k++ )
|
||||
for ( i = 0; i < 16; i++ )
|
||||
B[ k * 16 + ( i * 5 % 16 ) ] = X[ k * 16 + i ];
|
||||
}
|
||||
|
||||
/**
|
||||
* smix2(B, r, N, Nloop, V, X, ctx):
|
||||
* Compute second loop of B = SMix_r(B, N). The input B must be 128r bytes in
|
||||
* length; the temporary storage V must be 128rN bytes in length; the temporary
|
||||
* storage X must be 128r bytes in length. The value N must be a power of 2
|
||||
* greater than 1.
|
||||
*/
|
||||
static void smix2_8way( __m256i *B, size_t r, uint32_t N, uint32_t Nloop,
|
||||
__m256i *V, __m256i *X, pwxform_8way_ctx_t *ctx )
|
||||
{
|
||||
size_t s = 32 * r;
|
||||
uint32_t i, j;
|
||||
size_t k;
|
||||
|
||||
/* X <-- B */
|
||||
for ( k = 0; k < 2 * r; k++ )
|
||||
for ( i = 0; i < 16; i++ )
|
||||
X[ k * 16 + i ] = B[ k * 16 + ( i * 5 % 16 ) ];
|
||||
|
||||
/* 6: for i = 0 to N - 1 do */
|
||||
for ( i = 0; i < Nloop; i++ )
|
||||
{
|
||||
/* 7: j <-- Integerify(X) mod N */
|
||||
// j = integerify8(X, r) & (N - 1);
|
||||
|
||||
/* 8.1: X <-- X xor V_j */
|
||||
blkxor_8way( X, &V[j * s], s );
|
||||
/* V_j <-- X */
|
||||
if ( Nloop != 2 )
|
||||
blkcpy_8way( &V[j * s], X, s );
|
||||
|
||||
/* 8.2: X <-- H(X) */
|
||||
blockmix_pwxform_8way( X, ctx, r );
|
||||
}
|
||||
|
||||
/* 10: B' <-- X */
|
||||
for ( k = 0; k < 2 * r; k++ )
|
||||
for ( i = 0; i < 16; i++ )
|
||||
B[ k * 16 + ( i * 5 % 16 ) ] = X[ k * 16 + i ];
|
||||
}
|
||||
|
||||
/**
|
||||
* smix(B, r, N, p, t, V, X, ctx):
|
||||
* Compute B = SMix_r(B, N). The input B must be 128rp bytes in length; the
|
||||
* temporary storage V must be 128rN bytes in length; the temporary storage
|
||||
* X must be 128r bytes in length. The value N must be a power of 2 and at
|
||||
* least 16.
|
||||
*/
|
||||
static void smix_8way( __m256i *B, size_t r, uint32_t N,
|
||||
__m256i *V, __m256i *X, pwxform_8way_ctx_t *ctx)
|
||||
{
|
||||
uint32_t Nloop_all = (N + 2) / 3; /* 1/3, round up */
|
||||
uint32_t Nloop_rw = Nloop_all;
|
||||
|
||||
Nloop_all++; Nloop_all &= ~(uint32_t)1; /* round up to even */
|
||||
|
||||
if ( ctx->version == YESPOWER_0_5 )
|
||||
Nloop_rw &= ~(uint32_t)1; /* round down to even */
|
||||
else
|
||||
Nloop_rw++; Nloop_rw &= ~(uint32_t)1; /* round up to even */
|
||||
|
||||
smix1_8way( B, 1, ctx->Sbytes / 128, ctx->S, X, ctx );
|
||||
smix1_8way( B, r, N, V, X, ctx );
|
||||
smix2_8way( B, r, N, Nloop_rw /* must be > 2 */, V, X, ctx );
|
||||
smix2_8way( B, r, N, Nloop_all - Nloop_rw /* 0 or 2 */, V, X, ctx );
|
||||
}
|
||||
|
||||
/**
|
||||
* yespower(local, src, srclen, params, dst):
|
||||
* Compute yespower(src[0 .. srclen - 1], N, r), to be checked for "< target".
|
||||
*
|
||||
* Return 0 on success; or -1 on error.
|
||||
*/
|
||||
int yespower_8way( yespower_local_t *local, const __m256i *src, size_t srclen,
|
||||
const yespower_params_t *params, yespower_8way_binary_t *dst,
|
||||
int thrid )
|
||||
{
|
||||
yespower_version_t version = params->version;
|
||||
uint32_t N = params->N;
|
||||
uint32_t r = params->r;
|
||||
const uint8_t *pers = params->pers;
|
||||
size_t perslen = params->perslen;
|
||||
int retval = -1;
|
||||
size_t B_size, V_size;
|
||||
uint32_t *B, *V, *X, *S;
|
||||
pwxform_8way_ctx_t ctx;
|
||||
__m256i sha256[8];
|
||||
|
||||
/* Sanity-check parameters */
|
||||
if ( (version != YESPOWER_0_5 && version != YESPOWER_1_0 ) ||
|
||||
N < 1024 || N > 512 * 1024 || r < 8 || r > 32 ||
|
||||
(N & (N - 1)) != 0 || r < rmin ||
|
||||
(!pers && perslen) )
|
||||
{
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Allocate memory */
|
||||
B_size = (size_t)128 * r;
|
||||
V_size = B_size * N;
|
||||
if ((V = malloc(V_size)) == NULL)
|
||||
return -1;
|
||||
if ((B = malloc(B_size)) == NULL)
|
||||
goto free_V;
|
||||
if ((X = malloc(B_size)) == NULL)
|
||||
goto free_B;
|
||||
ctx.version = version;
|
||||
if (version == YESPOWER_0_5) {
|
||||
ctx.salsa20_rounds = 8;
|
||||
ctx.PWXrounds = PWXrounds_0_5;
|
||||
ctx.Swidth = Swidth_0_5;
|
||||
ctx.Sbytes = 2 * Swidth_to_Sbytes1(ctx.Swidth);
|
||||
} else {
|
||||
ctx.salsa20_rounds = 2;
|
||||
ctx.PWXrounds = PWXrounds_1_0;
|
||||
ctx.Swidth = Swidth_1_0;
|
||||
ctx.Sbytes = 3 * Swidth_to_Sbytes1(ctx.Swidth);
|
||||
}
|
||||
if ((S = malloc(ctx.Sbytes)) == NULL)
|
||||
goto free_X;
|
||||
ctx.S = S;
|
||||
ctx.S0 = (__m256i (*)[2])S;
|
||||
ctx.S1 = ctx.S0 + (1 << ctx.Swidth) * PWXsimple;
|
||||
ctx.S2 = ctx.S1 + (1 << ctx.Swidth) * PWXsimple;
|
||||
ctx.Smask = Swidth_to_Smask(ctx.Swidth);
|
||||
ctx.w = 0;
|
||||
|
||||
// do prehash
|
||||
sha256_8way_full( sha256, src, srclen );
|
||||
|
||||
|
||||
// need flexible size, use malloc;
|
||||
__m256i vpers[128];
|
||||
|
||||
if ( version != YESPOWER_0_5 && perslen )
|
||||
for ( int i = 0; i < perslen/4 + 1; i++ )
|
||||
vpers[i] = _mm256_set1_epi32( pers[i] );
|
||||
|
||||
/* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
|
||||
pbkdf2_sha256_8way( B, B_size, sha256, sizeof(sha256), vpers, perslen, 1 );
|
||||
|
||||
blkcpy_8way( sha256, B, sizeof(sha256) / sizeof(sha256[0] ) );
|
||||
|
||||
/* 3: B_i <-- MF(B_i, N) */
|
||||
smix_8way( B, r, N, V, X, &ctx );
|
||||
|
||||
if ( version == YESPOWER_0_5 )
|
||||
{
|
||||
/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
|
||||
pbkdf2_sha256_8way( dst, sizeof(*dst), sha256, sizeof(sha256),
|
||||
B, B_size, 1 );
|
||||
|
||||
if ( pers )
|
||||
{
|
||||
hmac_sha256_8way_full( dst, sizeof(*dst), vpers, perslen, sha256 );
|
||||
sha256_8way_full( dst, sha256, sizeof(sha256) );
|
||||
}
|
||||
}
|
||||
else
|
||||
hmac_sha256_8way_full( dst, B + B_size - 64, 64, sha256, sizeof(sha256) );
|
||||
|
||||
/* Success! */
|
||||
retval = 1;
|
||||
|
||||
/* Free memory */
|
||||
free(S);
|
||||
free_X:
|
||||
free(X);
|
||||
free_B:
|
||||
free(B);
|
||||
free_V:
|
||||
free(V);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
int yespower_8way_tls( const __m256i *src, size_t srclen,
|
||||
const yespower_params_t *params, yespower_8way_binary_t *dst, int trhid )
|
||||
{
|
||||
/* The reference implementation doesn't use thread-local storage */
|
||||
return yespower_8way( NULL, src, srclen, params, dst, trhid );
|
||||
}
|
||||
|
||||
int yespower_init_local8( yespower_local_t *local )
|
||||
{
|
||||
/* The reference implementation doesn't use the local structure */
|
||||
local->base = local->aligned = NULL;
|
||||
local->base_size = local->aligned_size = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int yespower_free_local8( yespower_local_t *local )
|
||||
{
|
||||
/* The reference implementation frees its memory in yespower() */
|
||||
(void)local; /* unused */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int yespower_8way_hash( const char *input, char *output, uint32_t len,
|
||||
int thrid )
|
||||
{
|
||||
return yespower_8way_tls( input, len, &yespower_params,
|
||||
(yespower_binary_t*)output, thrid );
|
||||
}
|
||||
|
||||
int scanhash_yespower_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8*8];
|
||||
uint32_t _ALIGN(128) vdata[20*8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
for ( int k = 0; k < 19; k++ )
|
||||
be32enc( &endiandata[k], pdata[k] );
|
||||
endiandata[19] = n;
|
||||
|
||||
// do sha256 prehash
|
||||
SHA256_Init( &sha256_prehash_ctx );
|
||||
SHA256_Update( &sha256_prehash_ctx, endiandata, 64 );
|
||||
|
||||
do {
|
||||
if ( yespower_hash( vdata, hash, 80, thr_id ) )
|
||||
if unlikely( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
be32enc( pdata+19, n );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
endiandata[19] = ++n;
|
||||
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // AVX2
|
||||
Reference in New Issue
Block a user