mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.10.6
This commit is contained in:
@@ -84,6 +84,7 @@ cpuminer_SOURCES = \
|
||||
algo/cubehash/cubehash_sse2.c\
|
||||
algo/cubehash/cube-hash-2way.c \
|
||||
algo/echo/sph_echo.c \
|
||||
algo/echo/echo-hash-4way.c \
|
||||
algo/echo/aes_ni/hash.c\
|
||||
algo/gost/sph_gost.c \
|
||||
algo/groestl/sph_groestl.c \
|
||||
|
@@ -7,9 +7,11 @@ Security warning
|
||||
----------------
|
||||
|
||||
Miner programs are often flagged as malware by antivirus programs. This is
|
||||
a false positive, they are flagged simply because they are cryptocurrency
|
||||
miners. The source code is open for anyone to inspect. If you don't trust
|
||||
the software, don't use it.
|
||||
usually a false positive, they are flagged simply because they are
|
||||
cryptocurrency miners. However, some malware has been spread using the
|
||||
cover that miners are known to be subject to false positives. Always be on
|
||||
alert. The source code of cpuminer-opt is open for anyone to inspect.
|
||||
If you don't trust the software don't download it.
|
||||
|
||||
The cryptographic hashing code has been taken from trusted sources but has been
|
||||
modified for speed at the expense of accepted security practices. This
|
||||
@@ -33,6 +35,16 @@ not supported. FreeBSD YMMV.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.10.6
|
||||
|
||||
Added support for SSL stratum: stratum+tcps://
|
||||
|
||||
Added job id reporting again, but leaner, suppressed with --quiet.
|
||||
|
||||
AVX512 for x21s, x22i, lyra2z, allium
|
||||
|
||||
Fixed share overflow warnings mining lbry with Ryzen (SHA).
|
||||
|
||||
v3.10.5
|
||||
|
||||
AVX512 for x17, sonoa, xevan, hmq1725, lyra2rev3, lyra2rev2.
|
||||
|
@@ -463,6 +463,38 @@ int blake2s_8way_final( blake2s_8way_state *S, void *out, uint8_t outlen )
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Update and final when inlen is a multiple of 64 bytes
|
||||
int blake2s_8way_full_blocks( blake2s_8way_state *S, void *out,
|
||||
const void *input, uint64_t inlen )
|
||||
{
|
||||
__m256i *in = (__m256i*)input;
|
||||
__m256i *buf = (__m256i*)S->buf;
|
||||
|
||||
while( inlen > BLAKE2S_BLOCKBYTES )
|
||||
{
|
||||
memcpy_256( buf, in, BLAKE2S_BLOCKBYTES >> 2 );
|
||||
S->buflen = BLAKE2S_BLOCKBYTES;
|
||||
inlen -= BLAKE2S_BLOCKBYTES;
|
||||
S->t[0] += BLAKE2S_BLOCKBYTES;
|
||||
S->t[1] += ( S->t[0] < BLAKE2S_BLOCKBYTES );
|
||||
blake2s_8way_compress( S, buf );
|
||||
S->buflen = 0;
|
||||
in += ( BLAKE2S_BLOCKBYTES >> 2 );
|
||||
}
|
||||
|
||||
// last block
|
||||
memcpy_256( buf, in, BLAKE2S_BLOCKBYTES >> 2 );
|
||||
S->buflen = BLAKE2S_BLOCKBYTES;
|
||||
S->t[0] += S->buflen;
|
||||
S->t[1] += ( S->t[0] < S->buflen );
|
||||
if ( S->last_node ) S->f[1] = ~0U;
|
||||
S->f[0] = ~0U;
|
||||
blake2s_8way_compress( S, buf );
|
||||
|
||||
for ( int i = 0; i < 8; ++i )
|
||||
casti_m256i( out, i ) = S->h[ i ];
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // __AVX2__
|
||||
|
||||
|
@@ -95,8 +95,8 @@ int blake2s_8way_init( blake2s_8way_state *S, const uint8_t outlen );
|
||||
int blake2s_8way_update( blake2s_8way_state *S, const void *in,
|
||||
uint64_t inlen );
|
||||
int blake2s_8way_final( blake2s_8way_state *S, void *out, uint8_t outlen );
|
||||
//int blake2s_8way_full_blocks( blake2s_8way_state *S, void *out,
|
||||
// const void *input, uint64_t inlen );
|
||||
int blake2s_8way_full_blocks( blake2s_8way_state *S, void *out,
|
||||
const void *input, uint64_t inlen );
|
||||
|
||||
#endif
|
||||
|
||||
|
559
algo/echo/echo-hash-4way.c
Normal file
559
algo/echo/echo-hash-4way.c
Normal file
@@ -0,0 +1,559 @@
|
||||
#if defined(__AVX512VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
#include "simd-utils.h"
|
||||
#include "echo-hash-4way.h"
|
||||
|
||||
/*
|
||||
#include <memory.h>
|
||||
#include "miner.h"
|
||||
#include "hash_api.h"
|
||||
//#include "vperm.h"
|
||||
#include <immintrin.h>
|
||||
*/
|
||||
/*
|
||||
#ifndef NO_AES_NI
|
||||
#include <wmmintrin.h>
|
||||
#else
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
*/
|
||||
|
||||
// not used
|
||||
/*
|
||||
const unsigned int _k_s0F[] = {0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F};
|
||||
const unsigned int _k_ipt[] = {0x5A2A7000, 0xC2B2E898, 0x52227808, 0xCABAE090, 0x317C4D00, 0x4C01307D, 0xB0FDCC81, 0xCD80B1FC};
|
||||
const unsigned int _k_opt[] = {0xD6B66000, 0xFF9F4929, 0xDEBE6808, 0xF7974121, 0x50BCEC00, 0x01EDBD51, 0xB05C0CE0, 0xE10D5DB1};
|
||||
const unsigned int _k_inv[] = {0x0D080180, 0x0E05060F, 0x0A0B0C02, 0x04070309, 0x0F0B0780, 0x01040A06, 0x02050809, 0x030D0E0C};
|
||||
const unsigned int _k_sb1[] = {0xCB503E00, 0xB19BE18F, 0x142AF544, 0xA5DF7A6E, 0xFAE22300, 0x3618D415, 0x0D2ED9EF, 0x3BF7CCC1};
|
||||
const unsigned int _k_sb2[] = {0x0B712400, 0xE27A93C6, 0xBC982FCD, 0x5EB7E955, 0x0AE12900, 0x69EB8840, 0xAB82234A, 0xC2A163C8};
|
||||
const unsigned int _k_sb3[] = {0xC0211A00, 0x53E17249, 0xA8B2DA89, 0xFB68933B, 0xF0030A00, 0x5FF35C55, 0xA6ACFAA5, 0xF956AF09};
|
||||
const unsigned int _k_sb4[] = {0x3FD64100, 0xE1E937A0, 0x49087E9F, 0xA876DE97, 0xC393EA00, 0x3D50AED7, 0x876D2914, 0xBA44FE79};
|
||||
const unsigned int _k_sb5[] = {0xF4867F00, 0x5072D62F, 0x5D228BDB, 0x0DA9A4F9, 0x3971C900, 0x0B487AC2, 0x8A43F0FB, 0x81B332B8};
|
||||
const unsigned int _k_sb7[] = {0xFFF75B00, 0xB20845E9, 0xE1BAA416, 0x531E4DAC, 0x3390E000, 0x62A3F282, 0x21C1D3B1, 0x43125170};
|
||||
const unsigned int _k_sbo[] = {0x6FBDC700, 0xD0D26D17, 0xC502A878, 0x15AABF7A, 0x5FBB6A00, 0xCFE474A5, 0x412B35FA, 0x8E1E90D1};
|
||||
const unsigned int _k_h63[] = {0x63636363, 0x63636363, 0x63636363, 0x63636363};
|
||||
const unsigned int _k_hc6[] = {0xc6c6c6c6, 0xc6c6c6c6, 0xc6c6c6c6, 0xc6c6c6c6};
|
||||
const unsigned int _k_h5b[] = {0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b};
|
||||
const unsigned int _k_h4e[] = {0x4e4e4e4e, 0x4e4e4e4e, 0x4e4e4e4e, 0x4e4e4e4e};
|
||||
const unsigned int _k_h0e[] = {0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e};
|
||||
const unsigned int _k_h15[] = {0x15151515, 0x15151515, 0x15151515, 0x15151515};
|
||||
const unsigned int _k_aesmix1[] = {0x0f0a0500, 0x030e0904, 0x07020d08, 0x0b06010c};
|
||||
const unsigned int _k_aesmix2[] = {0x000f0a05, 0x04030e09, 0x0807020d, 0x0c0b0601};
|
||||
const unsigned int _k_aesmix3[] = {0x05000f0a, 0x0904030e, 0x0d080702, 0x010c0b06};
|
||||
const unsigned int _k_aesmix4[] = {0x0a05000f, 0x0e090403, 0x020d0807, 0x06010c0b};
|
||||
*/
|
||||
|
||||
/*
|
||||
MYALIGN const unsigned int const1[] = {0x00000001, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int mul2mask[] = {0x00001b00, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int lsbmask[] = {0x01010101, 0x01010101, 0x01010101, 0x01010101};
|
||||
MYALIGN const unsigned int invshiftrows[] = {0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c};
|
||||
MYALIGN const unsigned int zero[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
*/
|
||||
|
||||
MYALIGN const unsigned int mul2ipt[] = {0x728efc00, 0x6894e61a, 0x3fc3b14d, 0x25d9ab57, 0xfd5ba600, 0x2a8c71d7, 0x1eb845e3, 0xc96f9234};
|
||||
|
||||
// do these need to be reversed?
|
||||
|
||||
#define mul2mask \
|
||||
m512_const4_32( 0x00001b00, 0, 0, 0 )
|
||||
|
||||
#define lsbmask m512_const1_32( 0x01010101 )
|
||||
|
||||
#define ECHO_SUBBYTES( state, i, j ) \
|
||||
state[i][j] = _mm512_aesenc_epi128( state[i][j], k1 ); \
|
||||
state[i][j] = _mm512_aesenc_epi128( state[i][j], m512_zero ); \
|
||||
k1 = _mm512_add_epi32( k1, m512_one_32 )
|
||||
|
||||
#define ECHO_MIXBYTES( state1, state2, j, t1, t2, s2 ) do \
|
||||
{ \
|
||||
const int j1 = ( j+1 ) & 3; \
|
||||
const int j2 = ( j+2 ) & 3; \
|
||||
const int j3 = ( j+3 ) & 3; \
|
||||
s2 = _mm512_add_epi8( state1[ 0 ] [j ], state1[ 0 ][ j ] ); \
|
||||
t1 = _mm512_srli_epi16( state1[ 0 ][ j ], 7 ); \
|
||||
t1 = _mm512_and_si128( t1, lsbmask );\
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 ); \
|
||||
state2[ 0 ] [j ] = s2; \
|
||||
state2[ 1 ] [j ] = state1[ 0 ][ j ]; \
|
||||
state2[ 2 ] [j ] = state1[ 0 ][ j ]; \
|
||||
state2[ 3 ] [j ] = _mm512_xor_si512( s2, state1[ 0 ][ j ] );\
|
||||
s2 = _mm512_add_epi8( state1[ 1 ][ j1 ], state1[ 1 ][ j1 ] ); \
|
||||
t1 = _mm512_srli_epi16( state1[ 1 ][ j1 ], 7 ); \
|
||||
t1 = _mm512_and_si512( t1, lsbmask ); \
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 );\
|
||||
state2[ 0 ][ j ] = _mm512_xor_si512( state2[ 0 ][ j ], \
|
||||
_mm512_xor_si512( s2, state1[ 1 ][ j1 ] ) ); \
|
||||
state2[ 1 ][ j ] = _mm512_xor_si512( state2[ 1 ][ j ], s2 ); \
|
||||
state2[ 2 ][ j ] = _mm512_xor_si512( state2[ 2 ][ j ], state1[ 1 ][ j1 ] ); \
|
||||
state2[ 3 ][ j ] = _mm512_xor_si512( state2[ 3 ][ j ], state1[ 1 ][ j1 ] ); \
|
||||
s2 = _mm512_add_epi8( state1[ 2 ][ j2 ], state1[ 2 ][ j2 ] ); \
|
||||
t1 = _mm512_srli_epi16( state1[ 2 ][ j2 ], 7 ); \
|
||||
t1 = _mm512_and_si512( t1, lsbmask ); \
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 ); \
|
||||
state2[ 0 ][ j ] = _mm512_xor_si512( state2[ 0 ][ j ], state1[ 2 ][ j2 ] ); \
|
||||
state2[ 1 ][ j ] = _mm512_xor_si512( state2[ 1 ][ j ], \
|
||||
_mm512_xor_si512( s2, state1[ 2 ][ j2 ] ) ); \
|
||||
state2[ 2 ][ j ] = _mm512_xor_si512128( state2[ 2 ][ j ], s2 ); \
|
||||
state2[ 3 ][ j ] = _mm512_xor_si512( state2[ 3][ j ], state1[ 2 ][ j2 ] ); \
|
||||
s2 = _mm512_add_epi8( state1[ 3 ][ j3 ], state1[ 3 ][ j3 ] ); \
|
||||
t1 = _mm512_srli_epi16( state1[ 3 ][ j3 ], 7 ); \
|
||||
t1 = _mm512_and_si512( t1, lsbmask ); \
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 ); \
|
||||
state2[ 0 ][ j ] = _mm512_xor_si512( state2[ 0 ][ j ], state1[ 3 ][ j3 ] ); \
|
||||
state2[ 1 ][ j ] = _mm512_xor_si512( state2[ 1 ][ j ], state1[ 3 ][ j3 ] ); \
|
||||
state2[ 2 ][ j ] = _mm512_xor_si512( state2[ 2 ][ j ], \
|
||||
_mm512_xor_si512( s2, state1[ 3 ][ j3] ) ); \
|
||||
state2[ 3 ][ j ] = _mm512_xor_si512( state2[ 3 ][ j ], s2 )
|
||||
} while(0)
|
||||
|
||||
#define ECHO_ROUND_UNROLL2 \
|
||||
ECHO_SUBBYTES(_state, 0, 0);\
|
||||
ECHO_SUBBYTES(_state, 1, 0);\
|
||||
ECHO_SUBBYTES(_state, 2, 0);\
|
||||
ECHO_SUBBYTES(_state, 3, 0);\
|
||||
ECHO_SUBBYTES(_state, 0, 1);\
|
||||
ECHO_SUBBYTES(_state, 1, 1);\
|
||||
ECHO_SUBBYTES(_state, 2, 1);\
|
||||
ECHO_SUBBYTES(_state, 3, 1);\
|
||||
ECHO_SUBBYTES(_state, 0, 2);\
|
||||
ECHO_SUBBYTES(_state, 1, 2);\
|
||||
ECHO_SUBBYTES(_state, 2, 2);\
|
||||
ECHO_SUBBYTES(_state, 3, 2);\
|
||||
ECHO_SUBBYTES(_state, 0, 3);\
|
||||
ECHO_SUBBYTES(_state, 1, 3);\
|
||||
ECHO_SUBBYTES(_state, 2, 3);\
|
||||
ECHO_SUBBYTES(_state, 3, 3);\
|
||||
ECHO_MIXBYTES(_state, _state2, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 3, t1, t2, s2);\
|
||||
ECHO_SUBBYTES(_state2, 0, 0);\
|
||||
ECHO_SUBBYTES(_state2, 1, 0);\
|
||||
ECHO_SUBBYTES(_state2, 2, 0);\
|
||||
ECHO_SUBBYTES(_state2, 3, 0);\
|
||||
ECHO_SUBBYTES(_state2, 0, 1);\
|
||||
ECHO_SUBBYTES(_state2, 1, 1);\
|
||||
ECHO_SUBBYTES(_state2, 2, 1);\
|
||||
ECHO_SUBBYTES(_state2, 3, 1);\
|
||||
ECHO_SUBBYTES(_state2, 0, 2);\
|
||||
ECHO_SUBBYTES(_state2, 1, 2);\
|
||||
ECHO_SUBBYTES(_state2, 2, 2);\
|
||||
ECHO_SUBBYTES(_state2, 3, 2);\
|
||||
ECHO_SUBBYTES(_state2, 0, 3);\
|
||||
ECHO_SUBBYTES(_state2, 1, 3);\
|
||||
ECHO_SUBBYTES(_state2, 2, 3);\
|
||||
ECHO_SUBBYTES(_state2, 3, 3);\
|
||||
ECHO_MIXBYTES(_state2, _state, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 3, t1, t2, s2)
|
||||
|
||||
|
||||
|
||||
#define SAVESTATE(dst, src)\
|
||||
dst[0][0] = src[0][0];\
|
||||
dst[0][1] = src[0][1];\
|
||||
dst[0][2] = src[0][2];\
|
||||
dst[0][3] = src[0][3];\
|
||||
dst[1][0] = src[1][0];\
|
||||
dst[1][1] = src[1][1];\
|
||||
dst[1][2] = src[1][2];\
|
||||
dst[1][3] = src[1][3];\
|
||||
dst[2][0] = src[2][0];\
|
||||
dst[2][1] = src[2][1];\
|
||||
dst[2][2] = src[2][2];\
|
||||
dst[2][3] = src[2][3];\
|
||||
dst[3][0] = src[3][0];\
|
||||
dst[3][1] = src[3][1];\
|
||||
dst[3][2] = src[3][2];\
|
||||
dst[3][3] = src[3][3]
|
||||
|
||||
|
||||
void echo_4way_compress( echo_4way_context *ctx, const unsigned char *pmsg,
|
||||
unsigned int uBlockCount )
|
||||
{
|
||||
unsigned int r, b, i, j;
|
||||
__m512i t1, t2, s2, k1;
|
||||
__m512i _state[4][4], _state2[4][4], _statebackup[4][4];
|
||||
|
||||
// unroll
|
||||
for ( i = 0; i < 4; i++ )
|
||||
for ( j = 0; j < ctx->uHashSize / 256; j++ )
|
||||
_state[ i ][ j ] = ctx->state[ i ][ j ];
|
||||
|
||||
for ( b = 0; b < uBlockCount; b++ )
|
||||
{
|
||||
ctx->k = _mm512_add_epi64( ctx->k, ctx->const1536 );
|
||||
|
||||
// load message, make aligned, remove loadu
|
||||
for( j = ctx->uHashSize / 256; j < 4; j++ )
|
||||
{
|
||||
for ( i = 0; i < 4; i++ )
|
||||
{
|
||||
_state[ i ][ j ] = _mm512_loadu_si512(
|
||||
(__m512i*)pmsg + 4 * (j - (ctx->uHashSize / 256)) + i );
|
||||
}
|
||||
}
|
||||
|
||||
// save state
|
||||
SAVESTATE( _statebackup, _state );
|
||||
|
||||
k1 = ctx->k;
|
||||
|
||||
for ( r = 0; r < ctx->uRounds / 2; r++ )
|
||||
{
|
||||
ECHO_ROUND_UNROLL2;
|
||||
}
|
||||
|
||||
if ( ctx->uHashSize == 256 )
|
||||
{
|
||||
for ( i = 0; i < 4; i++ )
|
||||
{
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_state[ i ][ 1 ] );
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_state[ i ][ 2 ] );
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_state[ i ][ 3 ] );
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_statebackup[ i ][ 0 ] );
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_statebackup[ i ][ 1 ] );
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_statebackup[ i ][ 2 ] ) ;
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_statebackup[ i ][ 3 ] );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for ( i = 0; i < 4; i++ )
|
||||
{
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_state[ i ][ 2 ] );
|
||||
_state[ i ][ 1 ] = _mm512_xor_si512( _state[ i ][ 1 ],
|
||||
_state[ i ][ 3 ] );
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_statebackup[ i ][ 0 ] );
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ] [0 ],
|
||||
_statebackup[ i ][ 2 ] );
|
||||
_state[ i ][ 1 ] = _mm512_xor_si512( _state[ i ][ 1 ],
|
||||
_statebackup[ i ][ 1 ] );
|
||||
_state[ i ][ 1 ] = _mm512_xor_si512( _state[ i ][ 1 ],
|
||||
_statebackup[ i ][ 3 ] );
|
||||
}
|
||||
}
|
||||
pmsg += ctx->uBlockLength;
|
||||
}
|
||||
SAVESTATE(ctx->state, _state);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
int echo_4way_init( echo_4way_context *ctx, int nHashSize )
|
||||
{
|
||||
int i, j;
|
||||
|
||||
ctx->k = m512_zero;
|
||||
ctx->processed_bits = 0;
|
||||
ctx->uBufferBytes = 0;
|
||||
|
||||
switch( nHashSize )
|
||||
{
|
||||
case 256:
|
||||
ctx->uHashSize = 256;
|
||||
ctx->uBlockLength = 192;
|
||||
ctx->uRounds = 8;
|
||||
ctx->hashsize = _mm512_const4_32( 0, 0, 0, 0x100 );
|
||||
ctx->const1536 = _mm512_const4_32( 0, 0, 0, 0x600 );
|
||||
break;
|
||||
|
||||
case 512:
|
||||
ctx->uHashSize = 512;
|
||||
ctx->uBlockLength = 128;
|
||||
ctx->uRounds = 10;
|
||||
ctx->hashsize = _mm512_const4_32( 0, 0, 0, 0x200 );
|
||||
ctx->const1536 = _mm512_const4_32( 0, 0, 0, 0x400);
|
||||
break;
|
||||
|
||||
default:
|
||||
return BAD_HASHBITLEN;
|
||||
}
|
||||
|
||||
|
||||
for( i = 0; i < 4; i++ )
|
||||
for( j = 0; j < nHashSize / 256; j++ )
|
||||
ctx->state[ i ][ j ] = ctx->hashsize;
|
||||
|
||||
for( i = 0; i < 4; i++ )
|
||||
for( j = nHashSize / 256; j < 4; j++ )
|
||||
ctx->state[ i ][ j ] = m512_zero;
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
int echo_4way_update( echo_4way_context *state, const BitSequence *data, DataLength databitlen )
|
||||
{
|
||||
unsigned int uByteLength, uBlockCount, uRemainingBytes;
|
||||
|
||||
uByteLength = (unsigned int)(databitlen / 8);
|
||||
|
||||
if ( ( state->uBufferBytes + uByteLength ) >= state->uBlockLength )
|
||||
{
|
||||
if ( state->uBufferBytes != 0 )
|
||||
{
|
||||
// Fill the buffer
|
||||
memcpy( state->buffer + state->uBufferBytes,
|
||||
(void*)data, state->uBlockLength - state->uBufferBytes );
|
||||
|
||||
// Process buffer
|
||||
echo_4way_compress( state, state->buffer, 1 );
|
||||
state->processed_bits += state->uBlockLength * 8;
|
||||
|
||||
data += state->uBlockLength - state->uBufferBytes;
|
||||
uByteLength -= state->uBlockLength - state->uBufferBytes;
|
||||
}
|
||||
|
||||
// buffer now does not contain any unprocessed bytes
|
||||
|
||||
uBlockCount = uByteLength / state->uBlockLength;
|
||||
uRemainingBytes = uByteLength % state->uBlockLength;
|
||||
|
||||
if ( uBlockCount > 0 )
|
||||
{
|
||||
echo_4way_compress( state, data, uBlockCount );
|
||||
|
||||
state->processed_bits += uBlockCount * state->uBlockLength * 8;
|
||||
data += uBlockCount * state->uBlockLength;
|
||||
}
|
||||
|
||||
if ( uRemainingBytes > 0 )
|
||||
{
|
||||
memcpy( state->buffer, (void*)data, uRemainingBytes );
|
||||
}
|
||||
|
||||
state->uBufferBytes = uRemainingBytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( state->buffer + state->uBufferBytes, (void*)data, uByteLength );
|
||||
state->uBufferBytes += uByteLength;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
echo_4way_close( echo_4way_context *state, BitSequence *hashval )
|
||||
{
|
||||
__m512i remainingbits;
|
||||
|
||||
// Add remaining bytes in the buffer
|
||||
state->processed_bits += state->uBufferBytes * 8;
|
||||
|
||||
remainingbits = _mm512_set4_epi32( 0, 0, 0, state->uBufferBytes * 8 );
|
||||
|
||||
// Pad with 0x80
|
||||
state->buffer[ state->uBufferBytes++ ] = 0x80;
|
||||
|
||||
// Enough buffer space for padding in this block?
|
||||
if ( ( state->uBlockLength - state->uBufferBytes ) >= 18)
|
||||
{
|
||||
// Pad with zeros
|
||||
memset( state->buffer + state->uBufferBytes, 0,
|
||||
state->uBlockLength - ( state->uBufferBytes + 18 ) );
|
||||
|
||||
// Hash size
|
||||
*( (unsigned short*)( state->buffer + state->uBlockLength - 18 ) )
|
||||
= state->uHashSize;
|
||||
|
||||
// Processed bits
|
||||
*( ( DataLength*)( state->buffer + state->uBlockLength - 16 ) )
|
||||
= state->processed_bits;
|
||||
*( ( DataLength*)( state->buffer + state->uBlockLength - 8 ) ) = 0;
|
||||
|
||||
// Last block contains message bits?
|
||||
if ( state->uBufferBytes == 1 )
|
||||
{
|
||||
state->k = _mm512_xor_si512( state->k, state->k );
|
||||
state->k = _mm512_sub_epi64( state->k, state->const1536 );
|
||||
}
|
||||
else
|
||||
{
|
||||
state->k = _mm512_add_epi64( state->k, remainingbits );
|
||||
state->k = _mm512_sub_epi64( state->k, state->const1536 );
|
||||
}
|
||||
|
||||
// Compress
|
||||
echo_4way_compress( state, state->buffer, 1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fill with zero and compress
|
||||
memset( state->buffer + state->uBufferBytes, 0,
|
||||
state->uBlockLength - state->uBufferBytes );
|
||||
state->k = _mm512_add_epi64( state->k, remainingbits );
|
||||
state->k = _mm512_sub_epi64( state->k, state->const1536 );
|
||||
echo_4way_compress( state, state->buffer, 1 );
|
||||
|
||||
// Last block
|
||||
memset( state->buffer, 0, state->uBlockLength - 18 );
|
||||
|
||||
// Hash size
|
||||
*( (unsigned short*)( state->buffer + state->uBlockLength - 18 ) )
|
||||
= state->uHashSize;
|
||||
|
||||
// Processed bits
|
||||
*( (DataLength*)( state->buffer + state->uBlockLength - 16 ) )
|
||||
= state->processed_bits;
|
||||
*( (DataLength*)( state->buffer + state->uBlockLength - 8 ) ) = 0;
|
||||
|
||||
// Compress the last block
|
||||
state->k = _mm512_xor_si512(state->k, state->k);
|
||||
state->k = _mm512_sub_epi64(state->k, state->const1536);
|
||||
echo_4way_compress(state, state->buffer, 1);
|
||||
}
|
||||
|
||||
// Store the hash value
|
||||
_mm512_storeu_si512( (__m512i*)hashval + 0, state->state[ 0][ 0 ]);
|
||||
_mm512_storeu_si512( (__m512i*)hashval + 1, state->state[ 1][ 0 ]);
|
||||
|
||||
if ( state->uHashSize == 512 )
|
||||
{
|
||||
_mm512_storeu_si512((__m512i*)hashval + 2, state->state[ 2 ][ 0 ]);
|
||||
_mm512_storeu_si512((__m512i*)hashval + 3, state->state[ 3 ][ 0 ]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int echo_4way_update_close( echo_4way_context *state, BitSequence *hashval,
|
||||
const BitSequence *data, DataLength databitlen )
|
||||
{
|
||||
unsigned int uByteLength, uBlockCount, uRemainingBytes;
|
||||
|
||||
uByteLength = (unsigned int)(databitlen / 8);
|
||||
|
||||
if ( (state->uBufferBytes + uByteLength) >= state->uBlockLength )
|
||||
{
|
||||
if ( state->uBufferBytes != 0 )
|
||||
{
|
||||
// Fill the buffer
|
||||
memcpy( state->buffer + state->uBufferBytes,
|
||||
(void*)data, state->uBlockLength - state->uBufferBytes );
|
||||
|
||||
// Process buffer
|
||||
echo_4way_compress( state, state->buffer, 1 );
|
||||
state->processed_bits += state->uBlockLength * 8;
|
||||
|
||||
data += state->uBlockLength - state->uBufferBytes;
|
||||
uByteLength -= state->uBlockLength - state->uBufferBytes;
|
||||
}
|
||||
|
||||
// buffer now does not contain any unprocessed bytes
|
||||
|
||||
uBlockCount = uByteLength / state->uBlockLength;
|
||||
uRemainingBytes = uByteLength % state->uBlockLength;
|
||||
|
||||
if ( uBlockCount > 0 )
|
||||
{
|
||||
echo_4way_compress( state, data, uBlockCount );
|
||||
state->processed_bits += uBlockCount * state->uBlockLength * 8;
|
||||
data += uBlockCount * state->uBlockLength;
|
||||
}
|
||||
|
||||
if ( uRemainingBytes > 0 )
|
||||
memcpy(state->buffer, (void*)data, uRemainingBytes);
|
||||
state->uBufferBytes = uRemainingBytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( state->buffer + state->uBufferBytes, (void*)data, uByteLength );
|
||||
state->uBufferBytes += uByteLength;
|
||||
}
|
||||
|
||||
__m512i remainingbits;
|
||||
|
||||
// Add remaining bytes in the buffer
|
||||
state->processed_bits += state->uBufferBytes * 8;
|
||||
|
||||
remainingbits = _mm512_set4_epi32( 0, 0, 0, state->uBufferBytes * 8 );
|
||||
|
||||
// Pad with 0x80
|
||||
state->buffer[ state->uBufferBytes++ ] = 0x80;
|
||||
// Enough buffer space for padding in this block?
|
||||
if ( (state->uBlockLength - state->uBufferBytes) >= 18 )
|
||||
{
|
||||
// Pad with zeros
|
||||
memset( state->buffer + state->uBufferBytes, 0,i
|
||||
state->uBlockLength - (state->uBufferBytes + 18) );
|
||||
|
||||
// Hash size
|
||||
*( (unsigned short*)(state->buffer + state->uBlockLength - 18) )
|
||||
= state->uHashSize;
|
||||
|
||||
// Processed bits
|
||||
*( (DataLength*)(state->buffer + state->uBlockLength - 16) ) =
|
||||
state->processed_bits;
|
||||
*( (DataLength*)(state->buffer + state->uBlockLength - 8) ) = 0;
|
||||
|
||||
// Last block contains message bits?
|
||||
if( state->uBufferBytes == 1 )
|
||||
{
|
||||
state->k = _mm512_xor_si512( state->k, state->k );
|
||||
state->k = _mm512_sub_epi64( state->k, state->const1536 );
|
||||
}
|
||||
else
|
||||
{
|
||||
state->k = _mm_add_epi64( state->k, remainingbits );
|
||||
state->k = _mm_sub_epi64( state->k, state->const1536 );
|
||||
}
|
||||
|
||||
// Compress
|
||||
echo_4way_compress( state, state->buffer, 1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fill with zero and compress
|
||||
memset( state->buffer + state->uBufferBytes, 0,
|
||||
state->uBlockLength - state->uBufferBytes );
|
||||
state->k = _mm512_add_epi64( state->k, remainingbits );
|
||||
state->k = _mm512_sub_epi64( state->k, state->const1536 );
|
||||
echo_4way_compress( state, state->buffer, 1 );
|
||||
|
||||
// Last block
|
||||
memset( state->buffer, 0, state->uBlockLength - 18 );
|
||||
|
||||
// Hash size
|
||||
*( (unsigned short*)(state->buffer + state->uBlockLength - 18) ) =
|
||||
state->uHashSize;
|
||||
|
||||
// Processed bits
|
||||
*( (DataLength*)(state->buffer + state->uBlockLength - 16) ) =
|
||||
state->processed_bits;
|
||||
*( (DataLength*)(state->buffer + state->uBlockLength - 8) ) = 0;
|
||||
// Compress the last block
|
||||
state->k = _mm512_xor_si512( state->k, state->k );
|
||||
state->k = _mm512_sub_epi64( state->k, state->const1536 );
|
||||
echo_4way_compress( state, state->buffer, 1) ;
|
||||
}
|
||||
|
||||
// Store the hash value
|
||||
_mm512_storeu_si512( (__m512i*)hashval + 0, state->state[ 0 ][ 0] );
|
||||
_mm512_storeu_si512( (__m512i*)hashval + 1, state->state[ 1 ][ 0] );
|
||||
|
||||
if ( state->uHashSize == 512 )
|
||||
{
|
||||
_mm512_storeu_si512( (__m512i*)hashval + 2, state->state[ 2 ][ 0 ] );
|
||||
_mm512_storeu_si512( (__m512i*)hashval + 3, state->state[ 3 ][ 0 ] );
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
36
algo/echo/echo-hash-4way.h
Normal file
36
algo/echo/echo-hash-4way.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#if !defined(ECHO_HASH_4WAY_H__)
|
||||
#define ECHO_HASH_4WAY_H__ 1
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
#include "simd-utils.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
__m512i state[4][4];
|
||||
__m512i buffer[ 4 * 192 / 16 ]; // 4x128 interleaved 192 bytes
|
||||
__m512i k;
|
||||
__m512i hashsize;
|
||||
__m512i const1536;
|
||||
|
||||
unsigned int uRounds;
|
||||
unsigned int uHashSize;
|
||||
unsigned int uBlockLength;
|
||||
unsigned int uBufferBytes;
|
||||
unsigned int processed_bits;
|
||||
|
||||
} echo_4way_context __attribute__ ((aligned (64)));
|
||||
|
||||
int echo_4way_init( echo_4way_context *state, int hashbitlen );
|
||||
|
||||
|
||||
int echo_4way_update( echo_4way_context *state, const void *data,
|
||||
unsigned int databitlen);
|
||||
|
||||
int echo_close( echo_4way_context *state, void *hashval );
|
||||
|
||||
int echo_4way_update_close( echo_4way_context *state, void *hashval,
|
||||
const void *data, int databitlen );
|
||||
|
||||
#endif
|
||||
#endif
|
@@ -9,6 +9,7 @@
|
||||
|
||||
//#ifndef NO_AES_NI
|
||||
|
||||
// Not to be confused with AVX512VAES
|
||||
#define VAES
|
||||
// #define VAVX
|
||||
// #define VVPERM
|
||||
|
@@ -1,15 +1,206 @@
|
||||
#include "lyra2-gate.h"
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
|
||||
#if defined (ALLIUM_4WAY)
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
|
||||
#if defined (ALLIUM_8WAY)
|
||||
|
||||
typedef struct {
|
||||
blake256_8way_context blake;
|
||||
keccak256_8way_context keccak;
|
||||
cube_4way_context cube;
|
||||
skein256_8way_context skein;
|
||||
hashState_groestl256 groestl;
|
||||
} allium_8way_ctx_holder;
|
||||
|
||||
static __thread allium_8way_ctx_holder allium_8way_ctx;
|
||||
|
||||
bool init_allium_8way_ctx()
|
||||
{
|
||||
keccak256_8way_init( &allium_8way_ctx.keccak );
|
||||
cube_4way_init( &allium_8way_ctx.cube, 256, 16, 32 );
|
||||
skein256_8way_init( &allium_8way_ctx.skein );
|
||||
init_groestl256( &allium_8way_ctx.groestl, 32 );
|
||||
return true;
|
||||
}
|
||||
|
||||
void allium_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhashB[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
allium_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx, &allium_8way_ctx, sizeof(allium_8way_ctx) );
|
||||
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
|
||||
blake256_8way_close( &ctx.blake, vhash );
|
||||
|
||||
rintrlv_8x32_8x64( vhashA, vhash, 256 );
|
||||
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
|
||||
keccak256_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, 256 );
|
||||
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
|
||||
/*
|
||||
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash4, 32, hash4, 32, hash4, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash5, 32, hash5, 32, hash5, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash6, 32, hash6, 32, hash6, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash7, 32, hash7, 32, hash7, 32, 1, 8, 8 );
|
||||
*/
|
||||
|
||||
|
||||
|
||||
intrlv_4x128( vhashA, hash0, hash1, hash2, hash3, 256 );
|
||||
intrlv_4x128( vhashB, hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
|
||||
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
|
||||
|
||||
/*
|
||||
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash4, 32, hash4, 32, hash4, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash5, 32, hash5, 32, hash5, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash6, 32, hash6, 32, hash6, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash7, 32, hash7, 32, hash7, 32, 1, 8, 8 );
|
||||
*/
|
||||
|
||||
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, 256 );
|
||||
|
||||
skein256_8way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, 256 );
|
||||
|
||||
update_and_final_groestl256( &ctx.groestl, state, hash0, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+32, hash1, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+64, hash2, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+96, hash3, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+128, hash4, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+160, hash5, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+192, hash6, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+224, hash7, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
}
|
||||
|
||||
int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
blake256_8way_init( &allium_8way_ctx.blake );
|
||||
blake256_8way_update( &allium_8way_ctx.blake, vdata, 64 );
|
||||
|
||||
do {
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
n+3, n+2, n+1, n ) );
|
||||
|
||||
allium_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ ) if ( (hash+(lane<<3))[7] <= Htarg )
|
||||
{
|
||||
if ( fulltest( hash+(lane<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#elif defined (ALLIUM_4WAY)
|
||||
|
||||
|
||||
typedef struct {
|
||||
blake256_4way_context blake;
|
||||
keccak256_4way_context keccak;
|
||||
|
@@ -129,7 +129,11 @@ bool register_lyra2rev2_algo( algo_gate_t* gate )
|
||||
|
||||
bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(LYRA2Z_8WAY)
|
||||
#if defined(LYRA2Z_16WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_16way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_16way;
|
||||
gate->hash = (void*)&lyra2z_16way_hash;
|
||||
#elif defined(LYRA2Z_8WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_8way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_8way;
|
||||
gate->hash = (void*)&lyra2z_8way_hash;
|
||||
@@ -142,7 +146,7 @@ bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_lyra2z;
|
||||
gate->hash = (void*)&lyra2z_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT | AVX512_OPT;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
@@ -170,7 +174,11 @@ bool register_lyra2h_algo( algo_gate_t* gate )
|
||||
|
||||
bool register_allium_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (ALLIUM_4WAY)
|
||||
#if defined (ALLIUM_8WAY)
|
||||
gate->miner_thread_init = (void*)&init_allium_8way_ctx;
|
||||
gate->scanhash = (void*)&scanhash_allium_8way;
|
||||
gate->hash = (void*)&allium_8way_hash;
|
||||
#elif defined (ALLIUM_4WAY)
|
||||
gate->miner_thread_init = (void*)&init_allium_4way_ctx;
|
||||
gate->scanhash = (void*)&scanhash_allium_4way;
|
||||
gate->hash = (void*)&allium_4way_hash;
|
||||
@@ -179,7 +187,7 @@ bool register_allium_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_allium;
|
||||
gate->hash = (void*)&allium_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT | AVX512_OPT;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
@@ -85,17 +85,25 @@ bool init_lyra2rev2_ctx();
|
||||
|
||||
/////////////////////////
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#define LYRA2Z_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
#define LYRA2Z_8WAY
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define LYRA2Z_16WAY 1
|
||||
#elif defined(__AVX2__)
|
||||
#define LYRA2Z_8WAY 1
|
||||
#elif defined(__SSE2__)
|
||||
#define LYRA2Z_4WAY 1
|
||||
#endif
|
||||
|
||||
|
||||
#define LYRA2Z_MATRIX_SIZE BLOCK_LEN_INT64 * 8 * 8 * 8
|
||||
|
||||
#if defined(LYRA2Z_8WAY)
|
||||
#if defined(LYRA2Z_16WAY)
|
||||
|
||||
void lyra2z_16way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool lyra2z_16way_thread_init();
|
||||
|
||||
#elif defined(LYRA2Z_8WAY)
|
||||
|
||||
void lyra2z_8way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
@@ -144,13 +152,22 @@ bool lyra2h_thread_init();
|
||||
|
||||
//////////////////////////////////
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define ALLIUM_4WAY
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define ALLIUM_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define ALLIUM_4WAY 1
|
||||
#endif
|
||||
|
||||
bool register_allium_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(ALLIUM_4WAY)
|
||||
#if defined(ALLIUM_8WAY)
|
||||
|
||||
void allium_8way_hash( void *state, const void *input );
|
||||
int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_allium_8way_ctx();
|
||||
|
||||
#elif defined(ALLIUM_4WAY)
|
||||
|
||||
void allium_4way_hash( void *state, const void *input );
|
||||
int scanhash_allium_4way( struct work *work, uint32_t max_nonce,
|
||||
|
@@ -26,14 +26,17 @@
|
||||
#include "lyra2.h"
|
||||
#include "sponge.h"
|
||||
|
||||
// LYRA2RE 8 cols 8 rows used by lyea2re, allium, phi2, x22i, x25x.
|
||||
// LYRA2RE 8 cols 8 rows used by lyra2re, allium, phi2, x22i, x25x,
|
||||
// dynamic matrix allocation.
|
||||
//
|
||||
// LYRA2REV2 4 cols 4 rows used by lyra2rev2.
|
||||
// LYRA2REV2 4 cols 4 rows used by lyra2rev2 and x21s, static matrix
|
||||
// allocation.
|
||||
//
|
||||
// LYRA2REV3 4 cols 4 rows with an extra twist in calculating
|
||||
// rowa in the wandering phase. Used by lyra2rev3.
|
||||
// rowa in the wandering phase. Used by lyra2rev3. Static matrix
|
||||
// allocation.
|
||||
//
|
||||
// LYRA2Z various cols & rows and supports 80 input. Used by lyra2z,
|
||||
// LYRA2Z various cols & rows and supports 80 byte input. Used by lyra2z,
|
||||
// lyra2z330, lyra2h,
|
||||
|
||||
|
||||
@@ -60,7 +63,7 @@
|
||||
*/
|
||||
|
||||
// For lyra2rev3.
|
||||
// convert a simple offset to an index into interleaved data.
|
||||
// convert a simple offset to an index into 2x4 u64 interleaved data.
|
||||
// good for state and 4 row matrix.
|
||||
// index = ( int( off / 4 ) * 2 ) + ( off mod 4 )
|
||||
|
||||
@@ -202,12 +205,8 @@ int LYRA2REV3_2WAY( uint64_t* wholeMatrix, void *K, uint64_t kLen,
|
||||
|
||||
// hard coded for 32 byte input as well as matrix size.
|
||||
// Other required versions include 80 byte input and different block
|
||||
// sizez
|
||||
// sizes.
|
||||
|
||||
//int LYRA2REV3_2WAY( uint64_t* wholeMatrix, void *K, uint64_t kLen,
|
||||
// const void *pwd, const uint64_t pwdlen, const void *salt,
|
||||
// const uint64_t saltlen, const uint64_t timeCost, const uint64_t nRows,
|
||||
// const uint64_t nCols )
|
||||
{
|
||||
//====================== Basic variables ============================//
|
||||
uint64_t _ALIGN(256) state[32];
|
||||
@@ -335,159 +334,111 @@ int LYRA2REV3_2WAY( uint64_t* wholeMatrix, void *K, uint64_t kLen,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
#if 0
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
int LYRA2Z( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
const uint64_t pwdlen, const void *salt, const uint64_t saltlen,
|
||||
const uint64_t timeCost, const uint64_t nRows,
|
||||
const uint64_t nCols )
|
||||
|
||||
int LYRA2Z_2WAY( uint64_t* wholeMatrix, void *K, uint64_t kLen,
|
||||
const void *pwd, const uint64_t pwdlen, const uint64_t timeCost,
|
||||
const uint64_t nRows, const uint64_t nCols )
|
||||
{
|
||||
//========================== Basic variables ============================//
|
||||
uint64_t _ALIGN(256) state[16];
|
||||
int64_t row = 2; //index of row to be processed
|
||||
int64_t prev = 1; //index of prev (last row ever computed/modified)
|
||||
int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering)
|
||||
int64_t tau; //Time Loop iterator
|
||||
int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
|
||||
int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
|
||||
int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
|
||||
// int64_t i; //auxiliary iteration counter
|
||||
uint64_t _ALIGN(256) state[32];
|
||||
int64_t row = 2;
|
||||
int64_t prev = 1;
|
||||
int64_t rowa0 = 0;
|
||||
int64_t rowa1 = 0;
|
||||
int64_t tau;
|
||||
int64_t step = 1;
|
||||
int64_t window = 2;
|
||||
int64_t gap = 1;
|
||||
//=======================================================================/
|
||||
|
||||
//======= Initializing the Memory Matrix and pointers to it =============//
|
||||
//Tries to allocate enough space for the whole memory matrix
|
||||
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
|
||||
// const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
// memset( wholeMatrix, 0, ROW_LEN_BYTES * nRows );
|
||||
|
||||
//==== Getting the password + salt + basil padded with 10*1 ============//
|
||||
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
|
||||
//but this ensures that the password copied locally will be overwritten as soon as possible
|
||||
|
||||
//First, we clean enough blocks for the password, salt, basil and padding
|
||||
uint64_t nBlocksInput = ( ( saltlen + pwdlen + 6 *
|
||||
uint64_t nBlocksInput = ( ( pwdlen + pwdlen + 6 *
|
||||
sizeof (uint64_t) ) / BLOCK_LEN_BLAKE2_SAFE_BYTES ) + 1;
|
||||
byte *ptrByte = (byte*) wholeMatrix;
|
||||
memset( ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES );
|
||||
|
||||
//Prepends the password
|
||||
memcpy(ptrByte, pwd, pwdlen);
|
||||
ptrByte += pwdlen;
|
||||
uint64_t *ptr = wholeMatrix;
|
||||
uint64_t *pw = (uint64_t*)pwd;
|
||||
|
||||
//Concatenates the salt
|
||||
memcpy(ptrByte, salt, saltlen);
|
||||
ptrByte += saltlen;
|
||||
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
|
||||
memcpy(ptrByte, &kLen, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy(ptrByte, &pwdlen, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy(ptrByte, &saltlen, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy(ptrByte, &timeCost, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy(ptrByte, &nRows, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy(ptrByte, &nCols, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy( ptr, pw, 2*pwdlen ); // password
|
||||
ptr += pwdlen>>2;
|
||||
memcpy( ptr, pw, 2*pwdlen ); // password lane 1
|
||||
ptr += pwdlen>>2;
|
||||
|
||||
//Now comes the padding
|
||||
*ptrByte = 0x80; //first byte of padding: right after the password
|
||||
ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
|
||||
ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
|
||||
*ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
|
||||
// now build the rest interleaving on the fly.
|
||||
ptr[0] = ptr[ 4] = kLen;
|
||||
ptr[1] = ptr[ 5] = pwdlen;
|
||||
ptr[2] = ptr[ 6] = pwdlen; // saltlen
|
||||
ptr[3] = ptr[ 7] = timeCost;
|
||||
ptr[8] = ptr[12] = nRows;
|
||||
ptr[9] = ptr[13] = nCols;
|
||||
ptr[10] = ptr[14] = 0x80;
|
||||
ptr[11] = ptr[15] = 0x0100000000000000;
|
||||
|
||||
//=================== Initializing the Sponge State ====================//
|
||||
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
|
||||
// uint64_t *state = _mm_malloc(16 * sizeof(uint64_t), 32);
|
||||
// if (state == NULL) {
|
||||
// return -1;
|
||||
// }
|
||||
// initState( state );
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
|
||||
//============================== Setup Phase =============================//
|
||||
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
absorbBlockBlake2Safe_2way( state, ptrWord, nBlocksInput,
|
||||
BLOCK_LEN_BLAKE2_SAFE_INT64 );
|
||||
|
||||
absorbBlockBlake2Safe( state, ptrWord, nBlocksInput,
|
||||
BLOCK_LEN_BLAKE2_SAFE_INT64 );
|
||||
/*
|
||||
for ( i = 0; i < nBlocksInput; i++ )
|
||||
{
|
||||
absorbBlockBlake2Safe( state, ptrWord ); //absorbs each block of pad(pwd || salt || basil)
|
||||
ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT64; //goes to next block of pad(pwd || salt || basil)
|
||||
}
|
||||
*/
|
||||
//Initializes M[0] and M[1]
|
||||
reducedSqueezeRow0(state, &wholeMatrix[0], nCols); //The locally copied password is most likely overwritten here
|
||||
reducedDuplexRow1(state, &wholeMatrix[0], &wholeMatrix[ROW_LEN_INT64], nCols);
|
||||
//Initializes M[0] and M[1]
|
||||
reducedSqueezeRow0_2way( state, &wholeMatrix[0], nCols );
|
||||
|
||||
do {
|
||||
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
|
||||
reducedDuplexRowSetup(state, &wholeMatrix[prev*ROW_LEN_INT64], &wholeMatrix[rowa*ROW_LEN_INT64], &wholeMatrix[row*ROW_LEN_INT64], nCols);
|
||||
reducedDuplexRow1_2way( state, &wholeMatrix[0],
|
||||
&wholeMatrix[ 2 * ROW_LEN_INT64 ], nCols );
|
||||
|
||||
//updates the value of row* (deterministically picked during Setup))
|
||||
rowa = (rowa + step) & (window - 1);
|
||||
//update prev: it now points to the last row ever computed
|
||||
prev = row;
|
||||
//updates row: goes to the next row to be computed
|
||||
row++;
|
||||
do
|
||||
{
|
||||
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
|
||||
|
||||
//Checks if all rows in the window where visited.
|
||||
if (rowa == 0) {
|
||||
step = window + gap; //changes the step: approximately doubles its value
|
||||
window *= 2; //doubles the size of the re-visitation window
|
||||
gap = -gap; //inverts the modifier to the step
|
||||
}
|
||||
reducedDuplexRowSetup_2way( state, &wholeMatrix[ 2* prev * ROW_LEN_INT64],
|
||||
&wholeMatrix[ 2* rowa0 * ROW_LEN_INT64],
|
||||
&wholeMatrix[ 2* row*ROW_LEN_INT64],
|
||||
nCols );
|
||||
|
||||
} while (row < nRows);
|
||||
rowa0 = (rowa0 + step) & (window - 1);
|
||||
prev = row;
|
||||
row++;
|
||||
|
||||
//======================== Wandering Phase =============================//
|
||||
row = 0; //Resets the visitation to the first row of the memory matrix
|
||||
for ( tau = 1; tau <= timeCost; tau++ )
|
||||
{
|
||||
//Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1
|
||||
if ( rowa0 == 0 )
|
||||
{
|
||||
step = window + gap;
|
||||
window *= 2;
|
||||
gap = -gap;
|
||||
}
|
||||
} while ( row < nRows );
|
||||
|
||||
row = 0;
|
||||
for ( tau = 1; tau <= timeCost; tau++ )
|
||||
{
|
||||
step = (tau % 2 == 0) ? -1 : nRows / 2 - 1;
|
||||
do {
|
||||
//Selects a pseudorandom index row*
|
||||
//----------------------------------------------------------------------
|
||||
//rowa = ((unsigned int)state[0]) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
|
||||
rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
|
||||
//-----------------------------------------------------------------
|
||||
do
|
||||
{
|
||||
rowa0 = state[ 0 ] % nRows;
|
||||
rowa1 = state[ 4 ] % nRows;
|
||||
|
||||
//Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
|
||||
reducedDuplexRow(state, &wholeMatrix[prev*ROW_LEN_INT64], &wholeMatrix[rowa*ROW_LEN_INT64], &wholeMatrix[row*ROW_LEN_INT64], nCols);
|
||||
reducedDuplexRow_2way( state, &wholeMatrix[ 2* prev * ROW_LEN_INT64 ],
|
||||
&wholeMatrix[ 2* rowa0 * ROW_LEN_INT64 ],
|
||||
&wholeMatrix[ 2* rowa1 * ROW_LEN_INT64 ],
|
||||
&wholeMatrix[ 2* row *ROW_LEN_INT64 ],
|
||||
nCols );
|
||||
|
||||
//update prev: it now points to the last row ever computed
|
||||
prev = row;
|
||||
|
||||
//updates row: goes to the next row to be computed
|
||||
//---------------------------------------------------------------
|
||||
//row = (row + step) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
|
||||
row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
|
||||
//--------------------------------------------------------------------
|
||||
row = (row + step) % nRows;
|
||||
|
||||
} while (row != 0);
|
||||
}
|
||||
}
|
||||
|
||||
//========================= Wrap-up Phase ===============================//
|
||||
//Absorbs the last block of the memory matrix
|
||||
absorbBlock(state, &wholeMatrix[rowa*ROW_LEN_INT64]);
|
||||
absorbBlock_2way( state, &wholeMatrix[ 2 * rowa0 *ROW_LEN_INT64 ],
|
||||
&wholeMatrix[ 2 * rowa1 *ROW_LEN_INT64 ] );
|
||||
|
||||
//Squeezes the key
|
||||
squeeze( state, K, kLen );
|
||||
//Squeezes the key
|
||||
squeeze_2way( state, K, (unsigned int) kLen );
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
// Lyra2RE doesn't like the new wholeMatrix implementation
|
||||
int LYRA2RE_2WAY( void *K, uint64_t kLen, const void *pwd,
|
||||
@@ -495,7 +446,7 @@ int LYRA2RE_2WAY( void *K, uint64_t kLen, const void *pwd,
|
||||
const uint64_t nRows, const uint64_t nCols )
|
||||
{
|
||||
//====================== Basic variables ============================//
|
||||
uint64_t _ALIGN(256) state[16];
|
||||
uint64_t _ALIGN(256) state[32];
|
||||
int64_t row = 2; //index of row to be processed
|
||||
int64_t prev = 1; //index of prev (last row ever computed/modified)
|
||||
int64_t rowa0 = 0;
|
||||
@@ -517,25 +468,15 @@ int LYRA2RE_2WAY( void *K, uint64_t kLen, const void *pwd,
|
||||
: BLOCK_LEN_BLAKE2_SAFE_BYTES;
|
||||
|
||||
i = (int64_t)ROW_LEN_BYTES * nRows;
|
||||
uint64_t *wholeMatrix = _mm_malloc( i, 64 );
|
||||
uint64_t *wholeMatrix = _mm_malloc( 2*i, 64 );
|
||||
if (wholeMatrix == NULL)
|
||||
return -1;
|
||||
|
||||
#if defined(__AVX2__)
|
||||
memset_zero_256( (__m256i*)wholeMatrix, i>>5 );
|
||||
#elif defined(__SSE2__)
|
||||
memset_zero_128( (__m128i*)wholeMatrix, i>>4 );
|
||||
#else
|
||||
memset( wholeMatrix, 0, i );
|
||||
#endif
|
||||
memset_zero_512( (__m512i*)wholeMatrix, i>>5 );
|
||||
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
uint64_t *pw = (uint64_t*)pwd;
|
||||
|
||||
//=== Getting the password + salt + basil padded with 10*1 ==========//
|
||||
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
|
||||
//but this ensures that the password copied locally will be overwritten as soon as possible
|
||||
|
||||
//First, we clean enough blocks for the password, salt, basil and padding
|
||||
int64_t nBlocksInput = ( ( pwdlen + pwdlen + 6 * sizeof(uint64_t) )
|
||||
/ BLOCK_LEN_BLAKE2_SAFE_BYTES ) + 1;
|
||||
@@ -558,66 +499,8 @@ int LYRA2RE_2WAY( void *K, uint64_t kLen, const void *pwd,
|
||||
ptr[10] = ptr[14] = 0x80;
|
||||
ptr[11] = ptr[15] = 0x0100000000000000;
|
||||
|
||||
|
||||
/*
|
||||
byte *ptrByte = (byte*) wholeMatrix;
|
||||
|
||||
//Prepends the password
|
||||
memcpy(ptrByte, pwd, pwdlen);
|
||||
ptrByte += pwdlen;
|
||||
|
||||
//Concatenates the salt
|
||||
memcpy(ptrByte, salt, saltlen);
|
||||
ptrByte += saltlen;
|
||||
|
||||
// memset( ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES
|
||||
// - (saltlen + pwdlen) );
|
||||
|
||||
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
|
||||
memcpy(ptrByte, &kLen, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
v64 = pwdlen;
|
||||
memcpy(ptrByte, &v64, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
v64 = saltlen;
|
||||
memcpy(ptrByte, &v64, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
v64 = timeCost;
|
||||
memcpy(ptrByte, &v64, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
v64 = nRows;
|
||||
memcpy(ptrByte, &v64, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
v64 = nCols;
|
||||
memcpy(ptrByte, &v64, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
|
||||
//Now comes the padding
|
||||
*ptrByte = 0x80; //first byte of padding: right after the password
|
||||
ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
|
||||
ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
|
||||
*ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
|
||||
|
||||
//================= Initializing the Sponge State ====================//
|
||||
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
|
||||
|
||||
// initState( state );
|
||||
|
||||
//========================= Setup Phase =============================//
|
||||
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
|
||||
|
||||
ptrWord = wholeMatrix;
|
||||
|
||||
*/
|
||||
|
||||
absorbBlockBlake2Safe_2way( state, ptrWord, nBlocksInput, BLOCK_LEN );
|
||||
/*
|
||||
for (i = 0; i < nBlocksInput; i++)
|
||||
{
|
||||
absorbBlockBlake2Safe( state, ptrWord ); //absorbs each block of pad(pwd || salt || basil)
|
||||
ptrWord += BLOCK_LEN; //goes to next block of pad(pwd || salt || basil)
|
||||
}
|
||||
*/
|
||||
|
||||
//Initializes M[0] and M[1]
|
||||
reducedSqueezeRow0_2way( state, &wholeMatrix[0], nCols ); //The locally copied password is most likely overwritten here
|
||||
|
||||
|
@@ -62,6 +62,8 @@ int LYRA2(void *K, int64_t kLen, const void *pwd, int32_t pwdlen, const void *sa
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
int LYRA2RE_2WAY( void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen,
|
||||
uint64_t timeCost, uint64_t nRows, uint64_t nCols );
|
||||
|
||||
int LYRA2REV2_2WAY( uint64_t*, void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols );
|
||||
@@ -69,6 +71,9 @@ int LYRA2REV2_2WAY( uint64_t*, void *K, uint64_t kLen, const void *pwd,
|
||||
int LYRA2REV3_2WAY( uint64_t*, void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols );
|
||||
|
||||
int LYRA2Z_2WAY( uint64_t*, void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols );
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* LYRA2_H_ */
|
||||
|
@@ -1,13 +1,240 @@
|
||||
#include "lyra2-gate.h"
|
||||
|
||||
#ifdef LYRA2Z_4WAY
|
||||
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
#include "lyra2.h"
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
|
||||
#if defined(LYRA2Z_16WAY)
|
||||
|
||||
__thread uint64_t* lyra2z_16way_matrix;
|
||||
|
||||
bool lyra2z_16way_thread_init()
|
||||
{
|
||||
return ( lyra2z_16way_matrix = _mm_malloc( 2*LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_16way_context l2z_16way_blake_mid;
|
||||
|
||||
void lyra2z_16way_midstate( const void* input )
|
||||
{
|
||||
blake256_16way_init( &l2z_16way_blake_mid );
|
||||
blake256_16way_update( &l2z_16way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2z_16way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash8[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash9[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash10[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash11[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash12[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash13[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash14[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash15[8] __attribute__ ((aligned (64)));
|
||||
blake256_16way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &l2z_16way_blake_mid, sizeof l2z_16way_blake_mid );
|
||||
blake256_16way_update( &ctx_blake, input + (64*16), 16 );
|
||||
blake256_16way_close( &ctx_blake, vhash );
|
||||
|
||||
dintrlv_16x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
hash8, hash9, hash10, hash11 ,hash12, hash13, hash14, hash15,
|
||||
vhash, 256 );
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash8, hash9, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash8, hash9, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash10, hash11, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash10, hash11, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash12, hash13, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash12, hash13, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash14, hash15, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash14, hash15, vhash, 256 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+ 32, hash1, 32 );
|
||||
memcpy( state+ 64, hash2, 32 );
|
||||
memcpy( state+ 96, hash3, 32 );
|
||||
memcpy( state+128, hash4, 32 );
|
||||
memcpy( state+160, hash5, 32 );
|
||||
memcpy( state+192, hash6, 32 );
|
||||
memcpy( state+224, hash7, 32 );
|
||||
memcpy( state+256, hash8, 32 );
|
||||
memcpy( state+288, hash9, 32 );
|
||||
memcpy( state+320, hash10, 32 );
|
||||
memcpy( state+352, hash11, 32 );
|
||||
memcpy( state+384, hash12, 32 );
|
||||
memcpy( state+416, hash13, 32 );
|
||||
memcpy( state+448, hash14, 32 );
|
||||
memcpy( state+480, hash15, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||
lyra2z_16way_midstate( vdata );
|
||||
|
||||
do {
|
||||
*noncev = mm512_bswap_32( _mm512_set_epi32( n+15, n+14, n+13, n+12,
|
||||
n+11, n+10, n+ 9, n+ 8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4,
|
||||
n+ 3, n+ 2, n+ 1, n ) );
|
||||
lyra2z_16way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 16;
|
||||
} while ( (n < max_nonce-16) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined(LYRA2Z_8WAY)
|
||||
|
||||
__thread uint64_t* lyra2z_8way_matrix;
|
||||
|
||||
bool lyra2z_8way_thread_init()
|
||||
{
|
||||
return ( lyra2z_8way_matrix = _mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_8way_context l2z_8way_blake_mid;
|
||||
|
||||
void lyra2z_8way_midstate( const void* input )
|
||||
{
|
||||
blake256_8way_init( &l2z_8way_blake_mid );
|
||||
blake256_8way( &l2z_8way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2z_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
blake256_8way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &l2z_8way_blake_mid, sizeof l2z_8way_blake_mid );
|
||||
blake256_8way( &ctx_blake, input + (64*8), 16 );
|
||||
blake256_8way_close( &ctx_blake, vhash );
|
||||
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
LYRA2Z( lyra2z_8way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash2, 32, hash2, 32, hash2, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash3, 32, hash3, 32, hash3, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash4, 32, hash4, 32, hash4, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash5, 32, hash5, 32, hash5, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash6, 32, hash6, 32, hash6, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash7, 32, hash7, 32, hash7, 32, 8, 8, 8 );
|
||||
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+ 32, hash1, 32 );
|
||||
memcpy( state+ 64, hash2, 32 );
|
||||
memcpy( state+ 96, hash3, 32 );
|
||||
memcpy( state+128, hash4, 32 );
|
||||
memcpy( state+160, hash5, 32 );
|
||||
memcpy( state+192, hash6, 32 );
|
||||
memcpy( state+224, hash7, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
lyra2z_8way_midstate( vdata );
|
||||
|
||||
do {
|
||||
*noncev = mm256_bswap_32(
|
||||
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||
lyra2z_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#elif defined(LYRA2Z_4WAY)
|
||||
|
||||
|
||||
__thread uint64_t* lyra2z_4way_matrix;
|
||||
|
||||
bool lyra2z_4way_thread_init()
|
||||
@@ -85,100 +312,3 @@ int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(LYRA2Z_8WAY)
|
||||
|
||||
__thread uint64_t* lyra2z_8way_matrix;
|
||||
|
||||
bool lyra2z_8way_thread_init()
|
||||
{
|
||||
return ( lyra2z_8way_matrix = _mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_8way_context l2z_8way_blake_mid;
|
||||
|
||||
void lyra2z_8way_midstate( const void* input )
|
||||
{
|
||||
blake256_8way_init( &l2z_8way_blake_mid );
|
||||
blake256_8way( &l2z_8way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2z_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
blake256_8way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &l2z_8way_blake_mid, sizeof l2z_8way_blake_mid );
|
||||
blake256_8way( &ctx_blake, input + (64*8), 16 );
|
||||
blake256_8way_close( &ctx_blake, vhash );
|
||||
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
LYRA2Z( lyra2z_8way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash2, 32, hash2, 32, hash2, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash3, 32, hash3, 32, hash3, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash4, 32, hash4, 32, hash4, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash5, 32, hash5, 32, hash5, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash6, 32, hash6, 32, hash6, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash7, 32, hash7, 32, hash7, 32, 8, 8, 8 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+ 32, hash1, 32 );
|
||||
memcpy( state+ 64, hash2, 32 );
|
||||
memcpy( state+ 96, hash3, 32 );
|
||||
memcpy( state+128, hash4, 32 );
|
||||
memcpy( state+160, hash5, 32 );
|
||||
memcpy( state+192, hash6, 32 );
|
||||
memcpy( state+224, hash7, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
lyra2z_8way_midstate( vdata );
|
||||
|
||||
do {
|
||||
*noncev = mm256_bswap_32(
|
||||
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||
lyra2z_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
@@ -10,7 +10,140 @@
|
||||
#define LBRY_MIDSTATE 64
|
||||
#define LBRY_TAIL (LBRY_INPUT_SIZE) - (LBRY_MIDSTATE)
|
||||
|
||||
#if defined(LBRY_8WAY)
|
||||
#if defined(LBRY_16WAY)
|
||||
|
||||
static __thread sha256_16way_context sha256_16w_mid;
|
||||
|
||||
void lbry_16way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t _ALIGN(128) vhashA[16<<4];
|
||||
uint32_t _ALIGN(64) vhashB[16<<4];
|
||||
uint32_t _ALIGN(64) vhashC[16<<4];
|
||||
uint32_t _ALIGN(64) h0[32];
|
||||
uint32_t _ALIGN(64) h1[32];
|
||||
uint32_t _ALIGN(64) h2[32];
|
||||
uint32_t _ALIGN(64) h3[32];
|
||||
uint32_t _ALIGN(64) h4[32];
|
||||
uint32_t _ALIGN(64) h5[32];
|
||||
uint32_t _ALIGN(64) h6[32];
|
||||
uint32_t _ALIGN(64) h7[32];
|
||||
uint32_t _ALIGN(64) h8[32];
|
||||
uint32_t _ALIGN(64) h9[32];
|
||||
uint32_t _ALIGN(64) h10[32];
|
||||
uint32_t _ALIGN(64) h11[32];
|
||||
uint32_t _ALIGN(64) h12[32];
|
||||
uint32_t _ALIGN(64) h13[32];
|
||||
uint32_t _ALIGN(64) h14[32];
|
||||
uint32_t _ALIGN(64) h15[32];
|
||||
sha256_16way_context ctx_sha256 __attribute__ ((aligned (64)));
|
||||
sha512_8way_context ctx_sha512;
|
||||
ripemd160_16way_context ctx_ripemd;
|
||||
|
||||
memcpy( &ctx_sha256, &sha256_16w_mid, sizeof(ctx_sha256) );
|
||||
sha256_16way_update( &ctx_sha256, input + (LBRY_MIDSTATE<<4), LBRY_TAIL );
|
||||
sha256_16way_close( &ctx_sha256, vhashA );
|
||||
|
||||
sha256_16way_init( &ctx_sha256 );
|
||||
sha256_16way_update( &ctx_sha256, vhashA, 32 );
|
||||
sha256_16way_close( &ctx_sha256, vhashA );
|
||||
|
||||
// reinterleave to do sha512 4-way 64 bit twice.
|
||||
dintrlv_16x32( h0, h1, h2, h3, h4, h5, h6, h7,
|
||||
h8, h9, h10, h11, h12, h13, h14, h15, vhashA, 256 );
|
||||
intrlv_8x64( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, 256 );
|
||||
intrlv_8x64( vhashB, h8, h9, h10, h11, h12, h13, h14, h15, 256 );
|
||||
|
||||
sha512_8way_init( &ctx_sha512 );
|
||||
sha512_8way_update( &ctx_sha512, vhashA, 32 );
|
||||
sha512_8way_close( &ctx_sha512, vhashA );
|
||||
|
||||
sha512_8way_init( &ctx_sha512 );
|
||||
sha512_8way_update( &ctx_sha512, vhashB, 32 );
|
||||
sha512_8way_close( &ctx_sha512, vhashB );
|
||||
|
||||
// back to 8-way 32 bit
|
||||
dintrlv_8x64( h0, h1, h2, h3,h4, h5, h6, h7, vhashA, 512 );
|
||||
dintrlv_8x64( h8, h9, h10, h11, h12, h13, h14, h15, vhashB, 512 );
|
||||
intrlv_16x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7,
|
||||
h8, h9, h10, h11, h12, h13, h14, h15, 512 );
|
||||
|
||||
ripemd160_16way_init( &ctx_ripemd );
|
||||
ripemd160_16way_update( &ctx_ripemd, vhashA, 32 );
|
||||
ripemd160_16way_close( &ctx_ripemd, vhashB );
|
||||
|
||||
ripemd160_16way_init( &ctx_ripemd );
|
||||
ripemd160_16way_update( &ctx_ripemd, vhashA+(8<<4), 32 );
|
||||
ripemd160_16way_close( &ctx_ripemd, vhashC );
|
||||
|
||||
sha256_16way_init( &ctx_sha256 );
|
||||
sha256_16way_update( &ctx_sha256, vhashB, 20 );
|
||||
sha256_16way_update( &ctx_sha256, vhashC, 20 );
|
||||
sha256_16way_close( &ctx_sha256, vhashA );
|
||||
|
||||
sha256_16way_init( &ctx_sha256 );
|
||||
sha256_16way_update( &ctx_sha256, vhashA, 32 );
|
||||
sha256_16way_close( &ctx_sha256, output );
|
||||
}
|
||||
|
||||
int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[32*16] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<4]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[27];
|
||||
const uint32_t first_nonce = pdata[27];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t edata[32] __attribute__ ((aligned (64)));
|
||||
__m512i *noncev = (__m512i*)vdata + 27; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
// we need bigendian data...
|
||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
casti_m128i( edata, 5 ) = mm128_bswap_32( casti_m128i( pdata, 5 ) );
|
||||
casti_m128i( edata, 6 ) = mm128_bswap_32( casti_m128i( pdata, 6 ) );
|
||||
casti_m128i( edata, 7 ) = mm128_bswap_32( casti_m128i( pdata, 7 ) );
|
||||
intrlv_16x32( vdata, edata, edata, edata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, edata, edata, edata, edata, edata, 1024 );
|
||||
|
||||
sha256_16way_init( &sha256_16w_mid );
|
||||
sha256_16way( &sha256_16w_mid, vdata, LBRY_MIDSTATE );
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm512_bswap_32( _mm512_set_epi32( n+15, n+14, n+13, n+12,
|
||||
n+11, n+10, n+ 9, n+ 8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4,
|
||||
n+ 3, n+ 2, n+ 1, n ) );
|
||||
lbry_16way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
if ( unlikely( hash7[ i ] <= Htarg ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
extr_lane_16x32( lane_hash, hash, i, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[27] = n + i;
|
||||
submit_lane_solution( work, lane_hash, mythr, i );
|
||||
}
|
||||
}
|
||||
n += 16;
|
||||
} while ( (n < max_nonce-16) && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#elif defined(LBRY_8WAY)
|
||||
|
||||
static __thread sha256_8way_context sha256_8w_mid;
|
||||
|
||||
@@ -91,11 +224,6 @@ int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
|
||||
__m256i *noncev = (__m256i*)vdata + 27; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||
0xFFFFF000, 0xFFFF0000, 0 };
|
||||
|
||||
// we need bigendian data...
|
||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
@@ -106,33 +234,30 @@ int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
|
||||
casti_m128i( edata, 6 ) = mm128_bswap_32( casti_m128i( pdata, 6 ) );
|
||||
casti_m128i( edata, 7 ) = mm128_bswap_32( casti_m128i( pdata, 7 ) );
|
||||
intrlv_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 1024 );
|
||||
edata, edata, edata, edata, 1024 );
|
||||
|
||||
sha256_8way_init( &sha256_8w_mid );
|
||||
sha256_8way( &sha256_8w_mid, vdata, LBRY_MIDSTATE );
|
||||
|
||||
for ( int m = 0; m < sizeof(masks); m++ ) if ( Htarg <= htmax[m] )
|
||||
do
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32(
|
||||
n+7,n+6,n+5,n+4,n+3,n+2,n+1,n ) );
|
||||
lbry_8way_hash( hash, vdata );
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32(
|
||||
n+7,n+6,n+5,n+4,n+3,n+2,n+1,n ) );
|
||||
lbry_8way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 8; i++ ) if ( !( hash7[ i ] & mask ) )
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( unlikely( hash7[ i ] <= Htarg ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
extr_lane_8x32( lane_hash, hash, i, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
extr_lane_8x32( lane_hash, hash, i, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[27] = n + i;
|
||||
submit_lane_solution( work, lane_hash, mythr, i );
|
||||
}
|
||||
pdata[27] = n + i;
|
||||
submit_lane_solution( work, lane_hash, mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -98,16 +98,23 @@ int lbry_get_work_data_size() { return LBRY_WORK_DATA_SIZE; }
|
||||
|
||||
bool register_lbry_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT | SHA_OPT;
|
||||
#if defined (LBRY_8WAY)
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
||||
#if defined (LBRY_16WAY)
|
||||
gate->scanhash = (void*)&scanhash_lbry_16way;
|
||||
gate->hash = (void*)&lbry_16way_hash;
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||
#elif defined (LBRY_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_lbry_8way;
|
||||
gate->hash = (void*)&lbry_8way_hash;
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||
#elif defined (LBRY_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_lbry_4way;
|
||||
gate->hash = (void*)&lbry_4way_hash;
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_lbry;
|
||||
gate->hash = (void*)&lbry_hash;
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
||||
#endif
|
||||
gate->calc_network_diff = (void*)&lbry_calc_network_diff;
|
||||
gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
|
||||
|
@@ -4,11 +4,20 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
// 16 way needs sha256 16 way
|
||||
//#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
// #define LBRY_16WAY
|
||||
#if defined(__AVX2__)
|
||||
#define LBRY_8WAY
|
||||
#endif
|
||||
/*
|
||||
#if !defined(__SHA__)
|
||||
#if defined(__AVX2__)
|
||||
#define LBRY_8WAY
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
|
||||
#define LBRY_NTIME_INDEX 25
|
||||
#define LBRY_NBITS_INDEX 26
|
||||
@@ -18,7 +27,12 @@
|
||||
|
||||
bool register_lbry_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(LBRY_8WAY)
|
||||
#if defined(LBRY_16WAY)
|
||||
|
||||
void lbry_16way_hash( void *state, const void *input );
|
||||
int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#elif defined(LBRY_8WAY)
|
||||
|
||||
void lbry_8way_hash( void *state, const void *input );
|
||||
int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
|
||||
|
@@ -80,9 +80,6 @@ int scanhash_lbry( struct work *work, uint32_t max_nonce,
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 32 );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < sizeof(masks); m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
@@ -90,23 +87,11 @@ int scanhash_lbry( struct work *work, uint32_t max_nonce,
|
||||
pdata[27] = ++n;
|
||||
be32enc(&endiandata[27], n);
|
||||
lbry_hash(hash64, &endiandata);
|
||||
#ifndef DEBUG_ALGO
|
||||
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
pdata[27] = n;
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
} while ( (n < max_nonce -8) && !work_restart[thr_id].restart);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@@ -623,3 +623,303 @@ void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst )
|
||||
|
||||
#endif // __AVX2__
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
// RIPEMD-160 16 way
|
||||
|
||||
|
||||
#define F16W_1(x, y, z) \
|
||||
_mm512_xor_si512( _mm512_xor_si512( x, y ), z )
|
||||
|
||||
#define F16W_2(x, y, z) \
|
||||
_mm512_xor_si512( _mm512_and_si512( _mm512_xor_si512( y, z ), x ), z )
|
||||
|
||||
#define F16W_3(x, y, z) \
|
||||
_mm512_xor_si512( _mm512_or_si512( x, mm512_not( y ) ), z )
|
||||
|
||||
#define F16W_4(x, y, z) \
|
||||
_mm512_xor_si512( _mm512_and_si512( _mm512_xor_si512( x, y ), z ), y )
|
||||
|
||||
#define F16W_5(x, y, z) \
|
||||
_mm512_xor_si512( x, _mm512_or_si512( y, mm512_not( z ) ) )
|
||||
|
||||
#define RR_16W(a, b, c, d, e, f, s, r, k) \
|
||||
do{ \
|
||||
a = _mm512_add_epi32( mm512_rol_32( _mm512_add_epi32( _mm512_add_epi32( \
|
||||
_mm512_add_epi32( a, f( b ,c, d ) ), r ), \
|
||||
m512_const1_64( k ) ), s ), e ); \
|
||||
c = mm512_rol_32( c, 10 );\
|
||||
} while (0)
|
||||
|
||||
#define ROUND1_16W(a, b, c, d, e, f, s, r, k) \
|
||||
RR_16W(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k)
|
||||
|
||||
#define ROUND2_16W(a, b, c, d, e, f, s, r, k) \
|
||||
RR_16W(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
|
||||
|
||||
static void ripemd160_16way_round( ripemd160_16way_context *sc )
|
||||
{
|
||||
const __m512i *in = (__m512i*)sc->buf;
|
||||
__m512i *h = (__m512i*)sc->val;
|
||||
register __m512i A1, B1, C1, D1, E1;
|
||||
register __m512i A2, B2, C2, D2, E2;
|
||||
__m512i tmp;
|
||||
|
||||
A1 = A2 = h[0];
|
||||
B1 = B2 = h[1];
|
||||
C1 = C2 = h[2];
|
||||
D1 = D2 = h[3];
|
||||
E1 = E2 = h[4];
|
||||
|
||||
ROUND1_16W( A, B, C, D, E, F16W_1, 11, in[ 0], 1 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_1, 14, in[ 1], 1 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_1, 15, in[ 2], 1 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_1, 12, in[ 3], 1 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_1, 5, in[ 4], 1 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_1, 8, in[ 5], 1 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_1, 7, in[ 6], 1 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_1, 9, in[ 7], 1 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_1, 11, in[ 8], 1 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_1, 13, in[ 9], 1 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_1, 14, in[10], 1 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_1, 15, in[11], 1 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_1, 6, in[12], 1 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_1, 7, in[13], 1 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_1, 9, in[14], 1 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_1, 8, in[15], 1 );
|
||||
|
||||
ROUND1_16W( E, A, B, C, D, F16W_2, 7, in[ 7], 2 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_2, 6, in[ 4], 2 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_2, 8, in[13], 2 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_2, 13, in[ 1], 2 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_2, 11, in[10], 2 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_2, 9, in[ 6], 2 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_2, 7, in[15], 2 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_2, 15, in[ 3], 2 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_2, 7, in[12], 2 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_2, 12, in[ 0], 2 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_2, 15, in[ 9], 2 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_2, 9, in[ 5], 2 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_2, 11, in[ 2], 2 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_2, 7, in[14], 2 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_2, 13, in[11], 2 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_2, 12, in[ 8], 2 );
|
||||
|
||||
ROUND1_16W( D, E, A, B, C, F16W_3, 11, in[ 3], 3 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_3, 13, in[10], 3 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_3, 6, in[14], 3 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_3, 7, in[ 4], 3 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_3, 14, in[ 9], 3 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_3, 9, in[15], 3 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_3, 13, in[ 8], 3 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_3, 15, in[ 1], 3 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_3, 14, in[ 2], 3 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_3, 8, in[ 7], 3 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_3, 13, in[ 0], 3 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_3, 6, in[ 6], 3 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_3, 5, in[13], 3 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_3, 12, in[11], 3 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_3, 7, in[ 5], 3 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_3, 5, in[12], 3 );
|
||||
|
||||
ROUND1_16W( C, D, E, A, B, F16W_4, 11, in[ 1], 4 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_4, 12, in[ 9], 4 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_4, 14, in[11], 4 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_4, 15, in[10], 4 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_4, 14, in[ 0], 4 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_4, 15, in[ 8], 4 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_4, 9, in[12], 4 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_4, 8, in[ 4], 4 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_4, 9, in[13], 4 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_4, 14, in[ 3], 4 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_4, 5, in[ 7], 4 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_4, 6, in[15], 4 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_4, 8, in[14], 4 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_4, 6, in[ 5], 4 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_4, 5, in[ 6], 4 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_4, 12, in[ 2], 4 );
|
||||
|
||||
ROUND1_16W( B, C, D, E, A, F16W_5, 9, in[ 4], 5 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_5, 15, in[ 0], 5 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_5, 5, in[ 5], 5 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_5, 11, in[ 9], 5 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_5, 6, in[ 7], 5 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_5, 8, in[12], 5 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_5, 13, in[ 2], 5 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_5, 12, in[10], 5 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_5, 5, in[14], 5 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_5, 12, in[ 1], 5 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_5, 13, in[ 3], 5 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_5, 14, in[ 8], 5 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_5, 11, in[11], 5 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_5, 8, in[ 6], 5 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_5, 5, in[15], 5 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_5, 6, in[13], 5 );
|
||||
|
||||
ROUND2_16W( A, B, C, D, E, F16W_5, 8, in[ 5], 1 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_5, 9, in[14], 1 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_5, 9, in[ 7], 1 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_5, 11, in[ 0], 1 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_5, 13, in[ 9], 1 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_5, 15, in[ 2], 1 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_5, 15, in[11], 1 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_5, 5, in[ 4], 1 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_5, 7, in[13], 1 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_5, 7, in[ 6], 1 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_5, 8, in[15], 1 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_5, 11, in[ 8], 1 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_5, 14, in[ 1], 1 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_5, 14, in[10], 1 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_5, 12, in[ 3], 1 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_5, 6, in[12], 1 );
|
||||
|
||||
ROUND2_16W( E, A, B, C, D, F16W_4, 9, in[ 6], 2 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_4, 13, in[11], 2 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_4, 15, in[ 3], 2 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_4, 7, in[ 7], 2 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_4, 12, in[ 0], 2 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_4, 8, in[13], 2 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_4, 9, in[ 5], 2 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_4, 11, in[10], 2 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_4, 7, in[14], 2 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_4, 7, in[15], 2 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_4, 12, in[ 8], 2 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_4, 7, in[12], 2 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_4, 6, in[ 4], 2 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_4, 15, in[ 9], 2 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_4, 13, in[ 1], 2 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_4, 11, in[ 2], 2 );
|
||||
|
||||
ROUND2_16W( D, E, A, B, C, F16W_3, 9, in[15], 3 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_3, 7, in[ 5], 3 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_3, 15, in[ 1], 3 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_3, 11, in[ 3], 3 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_3, 8, in[ 7], 3 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_3, 6, in[14], 3 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_3, 6, in[ 6], 3 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_3, 14, in[ 9], 3 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_3, 12, in[11], 3 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_3, 13, in[ 8], 3 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_3, 5, in[12], 3 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_3, 14, in[ 2], 3 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_3, 13, in[10], 3 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_3, 13, in[ 0], 3 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_3, 7, in[ 4], 3 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_3, 5, in[13], 3 );
|
||||
|
||||
ROUND2_16W( C, D, E, A, B, F16W_2, 15, in[ 8], 4 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_2, 5, in[ 6], 4 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_2, 8, in[ 4], 4 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_2, 11, in[ 1], 4 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_2, 14, in[ 3], 4 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_2, 14, in[11], 4 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_2, 6, in[15], 4 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_2, 14, in[ 0], 4 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_2, 6, in[ 5], 4 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_2, 9, in[12], 4 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_2, 12, in[ 2], 4 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_2, 9, in[13], 4 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_2, 12, in[ 9], 4 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_2, 5, in[ 7], 4 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_2, 15, in[10], 4 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_2, 8, in[14], 4 );
|
||||
|
||||
ROUND2_16W( B, C, D, E, A, F16W_1, 8, in[12], 5 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_1, 5, in[15], 5 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_1, 12, in[10], 5 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_1, 9, in[ 4], 5 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_1, 12, in[ 1], 5 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_1, 5, in[ 5], 5 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_1, 14, in[ 8], 5 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_1, 6, in[ 7], 5 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_1, 8, in[ 6], 5 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_1, 13, in[ 2], 5 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_1, 6, in[13], 5 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_1, 5, in[14], 5 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_1, 15, in[ 0], 5 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_1, 13, in[ 3], 5 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_1, 11, in[ 9], 5 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_1, 11, in[11], 5 );
|
||||
|
||||
tmp = _mm512_add_epi32( _mm512_add_epi32( h[1], C1 ), D2 );
|
||||
h[1] = _mm512_add_epi32( _mm512_add_epi32( h[2], D1 ), E2 );
|
||||
h[2] = _mm512_add_epi32( _mm512_add_epi32( h[3], E1 ), A2 );
|
||||
h[3] = _mm512_add_epi32( _mm512_add_epi32( h[4], A1 ), B2 );
|
||||
h[4] = _mm512_add_epi32( _mm512_add_epi32( h[0], B1 ), C2 );
|
||||
h[0] = tmp;
|
||||
}
|
||||
|
||||
void ripemd160_16way_init( ripemd160_16way_context *sc )
|
||||
{
|
||||
sc->val[0] = m512_const1_64( 0x6745230167452301 );
|
||||
sc->val[1] = m512_const1_64( 0xEFCDAB89EFCDAB89 );
|
||||
sc->val[2] = m512_const1_64( 0x98BADCFE98BADCFE );
|
||||
sc->val[3] = m512_const1_64( 0x1032547610325476 );
|
||||
sc->val[4] = m512_const1_64( 0xC3D2E1F0C3D2E1F0 );
|
||||
sc->count_high = sc->count_low = 0;
|
||||
}
|
||||
|
||||
void ripemd160_16way( ripemd160_16way_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
size_t ptr;
|
||||
const int block_size = 64;
|
||||
|
||||
ptr = (unsigned)sc->count_low & (block_size - 1U);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
uint32_t clow, clow2;
|
||||
|
||||
clen = block_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_512( sc->buf + (ptr>>2), vdata, clen>>2 );
|
||||
vdata = vdata + (clen>>2);
|
||||
ptr += clen;
|
||||
len -= clen;
|
||||
if ( ptr == block_size )
|
||||
{
|
||||
ripemd160_16way_round( sc );
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
clow2 = clow + clen;
|
||||
sc->count_low = clow2;
|
||||
if ( clow2 < clow )
|
||||
sc->count_high++;
|
||||
}
|
||||
}
|
||||
|
||||
void ripemd160_16way_close( ripemd160_16way_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr, u;
|
||||
uint32_t low, high;
|
||||
const int block_size = 64;
|
||||
const int pad = block_size - 8;
|
||||
|
||||
ptr = (unsigned)sc->count_low & ( block_size - 1U);
|
||||
sc->buf[ ptr>>2 ] = m512_const1_32( 0x80 );
|
||||
ptr += 4;
|
||||
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_512( sc->buf + (ptr>>2), (block_size - ptr) >> 2 );
|
||||
ripemd160_16way_round( sc );
|
||||
memset_zero_512( sc->buf, pad>>2 );
|
||||
}
|
||||
else
|
||||
memset_zero_512( sc->buf + (ptr>>2), (pad - ptr) >> 2 );
|
||||
|
||||
low = sc->count_low;
|
||||
high = (sc->count_high << 3) | (low >> 29);
|
||||
low = low << 3;
|
||||
sc->buf[ pad>>2 ] = _mm512_set1_epi32( low );
|
||||
sc->buf[ (pad>>2) + 1 ] = _mm512_set1_epi32( high );
|
||||
ripemd160_16way_round( sc );
|
||||
for (u = 0; u < 5; u ++)
|
||||
casti_m512i( dst, u ) = sc->val[u];
|
||||
}
|
||||
|
||||
#endif // AVX512
|
||||
|
@@ -32,7 +32,21 @@ void ripemd160_8way_init( ripemd160_8way_context *sc );
|
||||
void ripemd160_8way( ripemd160_8way_context *sc, const void *data, size_t len );
|
||||
void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst );
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
__m512i buf[64>>2];
|
||||
__m512i val[5];
|
||||
uint32_t count_high, count_low;
|
||||
} __attribute__ ((aligned (128))) ripemd160_16way_context;
|
||||
|
||||
void ripemd160_16way_init( ripemd160_16way_context *sc );
|
||||
void ripemd160_16way( ripemd160_16way_context *sc, const void *data,
|
||||
size_t len );
|
||||
void ripemd160_16way_close( ripemd160_16way_context *sc, void *dst );
|
||||
|
||||
#endif // AVX512
|
||||
#endif // __AVX2__
|
||||
#endif // __SSE4_2__
|
||||
#endif // RIPEMD_HASH_4WAY_H__
|
||||
|
@@ -74,7 +74,8 @@ typedef struct {
|
||||
} sha256_8way_context __attribute__ ((aligned (128)));
|
||||
|
||||
void sha256_8way_init( sha256_8way_context *sc );
|
||||
void sha256_8way( sha256_8way_context *sc, const void *data, size_t len );
|
||||
void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len );
|
||||
#define sha256_8way sha256_8way_update
|
||||
void sha256_8way_close( sha256_8way_context *sc, void *dst );
|
||||
|
||||
//#define SPH_SIZE_sha512 512
|
||||
|
@@ -100,9 +100,20 @@ c512( sph_shavite_big_context *sc, const void *msg )
|
||||
p3 = h[3];
|
||||
|
||||
// round
|
||||
|
||||
// working proof of concept
|
||||
/*
|
||||
__m512i K = m512_const1_128( m[0] );
|
||||
__m512i X = _mm512_xor_si512( m512_const1_128( p1 ), K );
|
||||
X = _mm512_aesenc_epi128( X, m512_zero );
|
||||
k00 = _mm512_castsi512_si128( K );
|
||||
x = _mm512_castsi512_si128( X );
|
||||
*/
|
||||
|
||||
k00 = m[0];
|
||||
x = _mm_xor_si128( p1, k00 );
|
||||
x = _mm_aesenc_si128( x, zero );
|
||||
|
||||
k01 = m[1];
|
||||
x = _mm_xor_si128( x, k01 );
|
||||
x = _mm_aesenc_si128( x, zero );
|
||||
|
@@ -52,10 +52,10 @@ bool register_x16r_algo( algo_gate_t* gate )
|
||||
|
||||
bool register_x16rv2_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_8WAY)
|
||||
#if defined (X16RV2_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16rv2_8way;
|
||||
gate->hash = (void*)&x16rv2_8way_hash;
|
||||
#elif defined (X16R_4WAY)
|
||||
#elif defined (X16RV2_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16rv2_4way;
|
||||
gate->hash = (void*)&x16rv2_4way_hash;
|
||||
#else
|
||||
@@ -205,10 +205,10 @@ void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
|
||||
bool register_x16rt_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_8WAY)
|
||||
#if defined (X16RT_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16rt_8way;
|
||||
gate->hash = (void*)&x16rt_8way_hash;
|
||||
#elif defined (X16R_4WAY)
|
||||
#elif defined (X16RT_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16rt_4way;
|
||||
gate->hash = (void*)&x16rt_4way_hash;
|
||||
#else
|
||||
@@ -222,10 +222,10 @@ bool register_x16rt_algo( algo_gate_t* gate )
|
||||
|
||||
bool register_x16rt_veil_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_8WAY)
|
||||
#if defined (X16RT_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16rt_8way;
|
||||
gate->hash = (void*)&x16rt_8way_hash;
|
||||
#elif defined (X16R_4WAY)
|
||||
#elif defined (X16RT_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16rt_4way;
|
||||
gate->hash = (void*)&x16rt_4way_hash;
|
||||
#else
|
||||
@@ -258,16 +258,23 @@ bool register_hex_algo( algo_gate_t* gate )
|
||||
|
||||
bool register_x21s_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_4WAY)
|
||||
#if defined (X21S_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x21s_8way;
|
||||
gate->hash = (void*)&x21s_8way_hash;
|
||||
gate->miner_thread_init = (void*)&x21s_8way_thread_init;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||
#elif defined (X21S_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x21s_4way;
|
||||
gate->hash = (void*)&x21s_4way_hash;
|
||||
gate->miner_thread_init = (void*)&x21s_4way_thread_init;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x21s;
|
||||
gate->hash = (void*)&x21s_hash;
|
||||
gate->miner_thread_init = (void*)&x21s_thread_init;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
|
||||
// gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
|
||||
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
|
@@ -12,6 +12,24 @@
|
||||
#define X16R_4WAY 1
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define X16RV2_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define X16RV2_4WAY 1
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define X16RT_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define X16RT_4WAY 1
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define X21S_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define X21S_4WAY 1
|
||||
#endif
|
||||
|
||||
enum x16r_Algo {
|
||||
BLAKE = 0,
|
||||
BMW,
|
||||
@@ -46,18 +64,13 @@ bool register_x16rt_algo( algo_gate_t* gate );
|
||||
bool register_hex__algo( algo_gate_t* gate );
|
||||
bool register_x21s__algo( algo_gate_t* gate );
|
||||
|
||||
// x16r, x16s
|
||||
#if defined(X16R_8WAY)
|
||||
|
||||
void x16r_8way_hash( void *state, const void *input );
|
||||
int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x16rv2_8way_hash( void *state, const void *input );
|
||||
int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void x16rt_8way_hash( void *state, const void *input );
|
||||
int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(X16R_4WAY)
|
||||
|
||||
@@ -65,31 +78,65 @@ void x16r_4way_hash( void *state, const void *input );
|
||||
int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x16rv2_4way_hash( void *state, const void *input );
|
||||
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x16rt_4way_hash( void *state, const void *input );
|
||||
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#else
|
||||
|
||||
void x16r_hash( void *state, const void *input );
|
||||
int scanhash_x16r( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
// x16Rv2
|
||||
#if defined(X16RV2_8WAY)
|
||||
|
||||
void x16rv2_8way_hash( void *state, const void *input );
|
||||
int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(X16RV2_4WAY)
|
||||
|
||||
void x16rv2_4way_hash( void *state, const void *input );
|
||||
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#else
|
||||
|
||||
void x16rv2_hash( void *state, const void *input );
|
||||
int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
// x16rt, veil
|
||||
#if defined(X16RT_8WAY)
|
||||
|
||||
void x16rt_8way_hash( void *state, const void *input );
|
||||
int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(X16RT_4WAY)
|
||||
|
||||
void x16rt_4way_hash( void *state, const void *input );
|
||||
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#else
|
||||
|
||||
void x16rt_hash( void *state, const void *input );
|
||||
int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(X16R_4WAY)
|
||||
// x21s
|
||||
#if defined(X21S_8WAY)
|
||||
|
||||
void x21s_8way_hash( void *state, const void *input );
|
||||
int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool x21s_8way_thread_init();
|
||||
|
||||
#elif defined(X21S_4WAY)
|
||||
|
||||
void x21s_4way_hash( void *state, const void *input );
|
||||
int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
||||
|
@@ -24,7 +24,7 @@
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
#if defined (X16R_8WAY)
|
||||
#if defined (X16RT_8WAY)
|
||||
|
||||
union _x16rt_8way_context_overlay
|
||||
{
|
||||
@@ -407,7 +407,7 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined (X16R_4WAY)
|
||||
#elif defined (X16RT_4WAY)
|
||||
|
||||
union _x16rt_4way_context_overlay
|
||||
{
|
||||
|
@@ -31,7 +31,7 @@
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
#if defined (X16R_8WAY)
|
||||
#if defined (X16RV2_8WAY)
|
||||
|
||||
union _x16rv2_8way_context_overlay
|
||||
{
|
||||
@@ -497,10 +497,7 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#elif defined (X16R_4WAY)
|
||||
|
||||
|
||||
#elif defined (X16RV2_4WAY)
|
||||
|
||||
union _x16rv2_4way_context_overlay
|
||||
{
|
||||
|
@@ -1,13 +1,10 @@
|
||||
/**
|
||||
* x16r algo implementation
|
||||
* x21s algo implementation
|
||||
*
|
||||
* Implementation by tpruvot@github Jan 2018
|
||||
* Optimized by JayDDee@github Jan 2018
|
||||
*/
|
||||
#include "x16r-gate.h"
|
||||
|
||||
#if defined (X16R_4WAY)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@@ -21,6 +18,7 @@
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/luffa/luffa-hash-2way.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||
@@ -38,6 +36,483 @@
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
#if defined (X21S_8WAY)
|
||||
|
||||
static __thread uint64_t* x21s_8way_matrix;
|
||||
|
||||
union _x21s_8way_context_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cube_4way_context cube;
|
||||
sph_shavite512_context shavite;
|
||||
simd_4way_context simd;
|
||||
hashState_echo echo;
|
||||
hamsi512_8way_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
shabal512_8way_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_8way_context sha512;
|
||||
haval256_5_8way_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
sha256_8way_context sha256;
|
||||
} __attribute__ ((aligned (64)));
|
||||
|
||||
typedef union _x21s_8way_context_overlay x21s_8way_context_overlay;
|
||||
|
||||
void x21s_8way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t vhash[24*8] __attribute__ ((aligned (128)));
|
||||
uint32_t hash0[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[24] __attribute__ ((aligned (64)));
|
||||
x21s_8way_context_overlay ctx;
|
||||
void *in0 = (void*) hash0;
|
||||
void *in1 = (void*) hash1;
|
||||
void *in2 = (void*) hash2;
|
||||
void *in3 = (void*) hash3;
|
||||
void *in4 = (void*) hash4;
|
||||
void *in5 = (void*) hash5;
|
||||
void *in6 = (void*) hash6;
|
||||
void *in7 = (void*) hash7;
|
||||
int size = 80;
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
input, 640 );
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch ( algo )
|
||||
{
|
||||
case BLAKE:
|
||||
blake512_8way_init( &ctx.blake );
|
||||
if ( i == 0 )
|
||||
blake512_8way_update( &ctx.blake, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
blake512_8way_update( &ctx.blake, vhash, size );
|
||||
}
|
||||
blake512_8way_close( &ctx.blake, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case BMW:
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
if ( i == 0 )
|
||||
bmw512_8way_update( &ctx.bmw, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, size );
|
||||
}
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case GROESTL:
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(const char*)in0, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(const char*)in1, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(const char*)in2, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(const char*)in3, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash4,
|
||||
(const char*)in4, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash5,
|
||||
(const char*)in5, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash6,
|
||||
(const char*)in6, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash7,
|
||||
(const char*)in7, size<<3 );
|
||||
break;
|
||||
case SKEIN:
|
||||
skein512_8way_init( &ctx.skein );
|
||||
if ( i == 0 )
|
||||
skein512_8way_update( &ctx.skein, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
skein512_8way_update( &ctx.skein, vhash, size );
|
||||
}
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case JH:
|
||||
jh512_8way_init( &ctx.jh );
|
||||
if ( i == 0 )
|
||||
jh512_8way_update( &ctx.jh, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
jh512_8way_update( &ctx.jh, vhash, size );
|
||||
}
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case KECCAK:
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
if ( i == 0 )
|
||||
keccak512_8way_update( &ctx.keccak, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, size );
|
||||
}
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case LUFFA:
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhash, vhash, size );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhash, vhash, size);
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
break;
|
||||
case SHAVITE:
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in0, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in1, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in2, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in3, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in4, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash4 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in5, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash5 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in6, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash6 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in7, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash7 );
|
||||
break;
|
||||
case SIMD:
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
break;
|
||||
case ECHO:
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence*)in0, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence*)in1, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence*)in2, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence*)in3, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash4,
|
||||
(const BitSequence*)in4, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash5,
|
||||
(const BitSequence*)in5, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash6,
|
||||
(const BitSequence*)in6, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash7,
|
||||
(const BitSequence*)in7, size<<3 );
|
||||
break;
|
||||
case HAMSI:
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, size );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in0, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in1, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in2, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in3, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in4, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash4 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in5, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash5 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in6, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash6 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in7, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash7 );
|
||||
break;
|
||||
case SHABAL:
|
||||
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhash, size );
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in0, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in1, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in2, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in3, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in4, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash4 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in5, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash5 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in6, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash6 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in7, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash7 );
|
||||
break;
|
||||
case SHA_512:
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, size );
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
}
|
||||
size = 64;
|
||||
}
|
||||
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
haval256_5_8way_init( &ctx.haval );
|
||||
haval256_5_8way_update( &ctx.haval, vhash, 64 );
|
||||
haval256_5_8way_close( &ctx.haval, vhash );
|
||||
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash0, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash0 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash1, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash1 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash2, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash2 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash3, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash3 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash4, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash4 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash5, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash5 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash6, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash6 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash7, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash7 );
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2REV2_2WAY( x21s_8way_matrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2REV2_2WAY( x21s_8way_matrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2REV2_2WAY( x21s_8way_matrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2REV2_2WAY( x21s_8way_matrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash0, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash0 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash1, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash1 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash2, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash2 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash3, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash3 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash4, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash4 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash5, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash5 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash6, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash6 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash7, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash7 );
|
||||
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
sha256_8way_init( &ctx.sha256 );
|
||||
sha256_8way_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_8way_close( &ctx.sha256, output );
|
||||
}
|
||||
|
||||
int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &hash[7<<3];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t bedata1[2] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
int thr_id = mythr->id;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
|
||||
bedata1[0] = bswap_32( pdata[1] );
|
||||
bedata1[1] = bswap_32( pdata[2] );
|
||||
uint32_t ntime = bswap_32( pdata[17] );
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
x21s_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash7[lane] <= Htarg ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( ( n < last_nonce ) && !(*restart) );
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool x21s_8way_thread_init()
|
||||
{
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols
|
||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
const int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
||||
x21s_8way_matrix = _mm_malloc( 2 * size, 64 );
|
||||
return x21s_8way_matrix;
|
||||
}
|
||||
|
||||
#elif defined (X21S_4WAY)
|
||||
|
||||
static __thread uint64_t* x21s_4way_matrix;
|
||||
|
||||
union _x21s_4way_context_overlay
|
||||
|
@@ -1,7 +1,4 @@
|
||||
#include "x22i-gate.h"
|
||||
|
||||
#if defined(X22I_4WAY)
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
@@ -12,6 +9,7 @@
|
||||
#include "algo/luffa/luffa-hash-2way.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
#include "algo/shavite/shavite-hash-2way.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||
@@ -25,6 +23,426 @@
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/swifftx/swifftx.h"
|
||||
|
||||
|
||||
#if defined(X22I_8WAY)
|
||||
|
||||
union _x22i_8way_ctx_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cube_4way_context cube;
|
||||
sph_shavite512_context shavite;
|
||||
simd_4way_context simd;
|
||||
hamsi512_8way_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
shabal512_8way_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_8way_context sha512;
|
||||
haval256_5_8way_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
sha256_8way_context sha256;
|
||||
};
|
||||
typedef union _x22i_8way_ctx_overlay x22i_8way_ctx_overlay;
|
||||
|
||||
void x22i_8way_hash( void *output, const void *input )
|
||||
{
|
||||
uint64_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||
uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashB[8*8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash0[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash4[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash5[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash6[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash7[8*4] __attribute__ ((aligned (64)));
|
||||
|
||||
// unsigned char hash[64 * 4] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hashA0[64] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hashA1[64] __attribute__((aligned(32))) = {0};
|
||||
unsigned char hashA2[64] __attribute__((aligned(32))) = {0};
|
||||
unsigned char hashA3[64] __attribute__((aligned(32))) = {0};
|
||||
unsigned char hashA4[64] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hashA5[64] __attribute__((aligned(32))) = {0};
|
||||
unsigned char hashA6[64] __attribute__((aligned(32))) = {0};
|
||||
unsigned char hashA7[64] __attribute__((aligned(32))) = {0};
|
||||
x22i_8way_ctx_overlay ctx;
|
||||
|
||||
blake512_8way_init( &ctx.blake );
|
||||
blake512_8way_update( &ctx.blake, input, 80 );
|
||||
blake512_8way_close( &ctx.blake, vhash );
|
||||
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(const char*)hash0, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(const char*)hash1, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(const char*)hash2, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(const char*)hash3, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash4,
|
||||
(const char*)hash4, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash5,
|
||||
(const char*)hash5, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash6,
|
||||
(const char*)hash6, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash7,
|
||||
(const char*)hash7, 512 );
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 64 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 64 );
|
||||
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
|
||||
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash0, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash1, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash2, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash3, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash4, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash4 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash5, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash5 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash6, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash6 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash7, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash7 );
|
||||
|
||||
intrlv_4x128_512( vhashA, hash0, hash1, hash2, hash3 );
|
||||
intrlv_4x128_512( vhashB, hash4, hash5, hash6, hash7 );
|
||||
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhashA, vhashA, 512 );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash0,
|
||||
(const BitSequence*)hash0, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash1,
|
||||
(const BitSequence*)hash1, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash2,
|
||||
(const BitSequence*)hash2, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash3,
|
||||
(const BitSequence*)hash3, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash4,
|
||||
(const BitSequence*)hash4, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash5,
|
||||
(const BitSequence*)hash5, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash6,
|
||||
(const BitSequence*)hash6, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash7,
|
||||
(const BitSequence*)hash7, 512 );
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash1, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash2, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash4, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash4 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash5, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash5 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash6, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash6 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash7, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash7 );
|
||||
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_8x32_512( &hash0[8], &hash1[8], &hash2[8], &hash3[8],
|
||||
&hash4[8], &hash5[8], &hash6[8], &hash7[8], vhash );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash0[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash0[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash1[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash1[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash2[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash2[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash3[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash3[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash4[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash4[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash5[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash5[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash6[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash6[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash7[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash7[16] );
|
||||
|
||||
intrlv_8x64_512( vhash, &hash0[16], &hash1[16], &hash2[16], &hash3[16],
|
||||
&hash4[16], &hash5[16], &hash6[16], &hash7[16] );
|
||||
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
|
||||
dintrlv_8x64_512( &hash0[24], &hash1[24], &hash2[24], &hash3[24],
|
||||
&hash4[24], &hash5[24], &hash6[24], &hash7[24], vhash );
|
||||
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash0, (unsigned char*)hashA0);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash1, (unsigned char*)hashA1);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash2, (unsigned char*)hashA2);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash3, (unsigned char*)hashA3);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash4, (unsigned char*)hashA4);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash5, (unsigned char*)hashA5);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash6, (unsigned char*)hashA6);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash7, (unsigned char*)hashA7);
|
||||
|
||||
intrlv_8x32_512( vhashA, hashA0, hashA1, hashA2, hashA3,
|
||||
hashA4, hashA5, hashA6, hashA7 );
|
||||
|
||||
memset( vhash, 0, 64*8 );
|
||||
|
||||
haval256_5_8way_init( &ctx.haval );
|
||||
haval256_5_8way_update( &ctx.haval, vhashA, 64 );
|
||||
haval256_5_8way_close( &ctx.haval, vhash );
|
||||
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
|
||||
memset( hashA0, 0, 64 );
|
||||
memset( hashA1, 0, 64 );
|
||||
memset( hashA2, 0, 64 );
|
||||
memset( hashA3, 0, 64 );
|
||||
memset( hashA4, 0, 64 );
|
||||
memset( hashA5, 0, 64 );
|
||||
memset( hashA6, 0, 64 );
|
||||
memset( hashA7, 0, 64 );
|
||||
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash0, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA0);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash1, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA1);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash2, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA2);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash3, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA3);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash4, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA4);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash5, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA5);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash6, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA6);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash7, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA7);
|
||||
|
||||
memset( hash0, 0, 64 );
|
||||
memset( hash1, 0, 64 );
|
||||
memset( hash2, 0, 64 );
|
||||
memset( hash3, 0, 64 );
|
||||
memset( hash4, 0, 64 );
|
||||
memset( hash5, 0, 64 );
|
||||
memset( hash6, 0, 64 );
|
||||
memset( hash7, 0, 64 );
|
||||
|
||||
intrlv_2x256( vhash, hashA0, hashA1, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hashA2, hashA3, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hashA4, hashA5, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hashA6, hashA7, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash0, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash0 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash1, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash1 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash2, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash2 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash3, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash3 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash4, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash4 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash5, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash5 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash6, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash6 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash7, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash7 );
|
||||
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
sha256_8way_init( &ctx.sha256 );
|
||||
sha256_8way_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_8way_close( &ctx.sha256, output );
|
||||
}
|
||||
|
||||
int scanhash_x22i_8way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x22i_8way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if unlikely( ( hash7[ lane ] <= Htarg ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#elif defined(X22I_4WAY)
|
||||
|
||||
|
||||
union _x22i_4way_ctx_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
|
@@ -2,27 +2,39 @@
|
||||
|
||||
bool register_x22i_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X22I_4WAY)
|
||||
#if defined (X22I_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x22i_8way;
|
||||
gate->hash = (void*)&x22i_8way_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||
#elif defined (X22I_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x22i_4way;
|
||||
gate->hash = (void*)&x22i_4way_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x22i;
|
||||
gate->hash = (void*)&x22i_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
bool register_x25x_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X22I_4WAY)
|
||||
#if defined (X25X_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x25x_8way;
|
||||
gate->hash = (void*)&x25x_8way_hash;
|
||||
// gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||
#elif defined (X25X_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x25x_4way;
|
||||
gate->hash = (void*)&x25x_4way_hash;
|
||||
// gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x25x;
|
||||
gate->hash = (void*)&x25x_hash;
|
||||
// gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -6,30 +6,64 @@
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define X22I_4WAY
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define X22I_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define X22I_4WAY 1
|
||||
#endif
|
||||
|
||||
bool register_x22i__algo( algo_gate_t* gate );
|
||||
bool register_x22i_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(X22I_4WAY)
|
||||
#if defined(X22I_8WAY)
|
||||
|
||||
void x22i_8way_hash( void *state, const void *input );
|
||||
int scanhash_x22i_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(X22I_4WAY)
|
||||
|
||||
void x22i_4way_hash( void *state, const void *input );
|
||||
int scanhash_x22i_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x25x_4way_hash( void *state, const void *input );
|
||||
int scanhash_x25x_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
#else
|
||||
|
||||
void x22i_hash( void *state, const void *input );
|
||||
int scanhash_x22i( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// Big problems with x25x 8 way. It blows up just by increasing the
|
||||
// buffer sizes and nothing else. It may have to do with accessing 2 dim arrays.
|
||||
|
||||
//#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
// #define X25X_8WAY 1
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define X25X_4WAY 1
|
||||
#endif
|
||||
|
||||
bool register_x25i_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(X25X_8WAY)
|
||||
|
||||
void x25x_8way_hash( void *state, const void *input );
|
||||
int scanhash_x25x_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(X25X_4WAY)
|
||||
|
||||
void x25x_4way_hash( void *state, const void *input );
|
||||
int scanhash_x25x_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#else
|
||||
|
||||
void x25x_hash( void *state, const void *input );
|
||||
int scanhash_x25x( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
#endif // X22I_GATE_H__
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#include "x22i-gate.h"
|
||||
|
||||
#if defined(X22I_4WAY)
|
||||
#if defined(X25X_4WAY)
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
@@ -88,276 +88,282 @@ void x25x_4way_hash( void *output, const void *input )
|
||||
unsigned char hash2[25][64] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hash3[25][64] __attribute__((aligned(64))) = {0};
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
unsigned char vhashA[24][64*4] __attribute__ ((aligned (64)));
|
||||
// Doubling the size of vhashX breaks everything. It may have something
|
||||
// to do with accessing arrays: vhashX vs vhashX[0] vs &vhash[0].
|
||||
// Changing notation did seem to allow the larger buffer but still resulted
|
||||
// in problems further along.
|
||||
// unsigned char vhashX[24][64*8] __attribute__ ((aligned (64)));
|
||||
unsigned char vhashX[24][64*4] __attribute__ ((aligned (64)));
|
||||
x25x_4way_ctx_overlay ctx __attribute__ ((aligned (64)));
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
dintrlv_4x64_512( &hash0[0], &hash1[0], &hash2[0], &hash3[0], vhash );
|
||||
dintrlv_4x64_512( hash0[0], hash1[0], hash2[0], hash3[0], vhash );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64_512( &hash0[1], &hash1[1], &hash2[1], &hash3[1], vhash );
|
||||
dintrlv_4x64_512( hash0[1], hash1[1], hash2[1], hash3[1], vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)&hash0[2],
|
||||
(const char*)&hash0[1], 512 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0[2],
|
||||
(const char*)hash0[1], 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)&hash1[2],
|
||||
(const char*)&hash1[1], 512 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1[2],
|
||||
(const char*)hash1[1], 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)&hash2[2],
|
||||
(const char*)&hash2[1], 512 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2[2],
|
||||
(const char*)hash2[1], 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)&hash3[2],
|
||||
(const char*)&hash3[1], 512 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3[2],
|
||||
(const char*)hash3[1], 512 );
|
||||
|
||||
intrlv_4x64_512( vhash, &hash0[2], &hash1[2], &hash2[2], &hash3[2] );
|
||||
intrlv_4x64_512( vhash, hash0[2], hash1[2], hash2[2], hash3[2] );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64_512( &hash0[3], &hash1[3], &hash2[3], &hash3[3], vhash );
|
||||
dintrlv_4x64_512( hash0[3], hash1[3], hash2[3], hash3[3], vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
dintrlv_4x64_512( &hash0[4], &hash1[4], &hash2[4], &hash3[4], vhash );
|
||||
dintrlv_4x64_512( hash0[4], hash1[4], hash2[4], hash3[4], vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64_512( &hash0[5], &hash1[5], &hash2[5], &hash3[5], vhash );
|
||||
dintrlv_4x64_512( hash0[5], hash1[5], hash2[5], hash3[5], vhash );
|
||||
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)&hash0[6],
|
||||
(const BitSequence*)&hash0[5], 64 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash0[6],
|
||||
(const BitSequence*)hash0[5], 64 );
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)&hash1[6],
|
||||
(const BitSequence*)&hash1[5], 64 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash1[6],
|
||||
(const BitSequence*)hash1[5], 64 );
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)&hash2[6],
|
||||
(const BitSequence*)&hash2[5], 64 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash2[6],
|
||||
(const BitSequence*)hash2[5], 64 );
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)&hash3[6],
|
||||
(const BitSequence*)&hash3[5], 64 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash3[6],
|
||||
(const BitSequence*)hash3[5], 64 );
|
||||
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) &hash0[7],
|
||||
(const byte*)&hash0[6], 64 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0[7],
|
||||
(const byte*)hash0[6], 64 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) &hash1[7],
|
||||
(const byte*)&hash1[6], 64 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash1[7],
|
||||
(const byte*)hash1[6], 64 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) &hash2[7],
|
||||
(const byte*)&hash2[6], 64 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash2[7],
|
||||
(const byte*)hash2[6], 64 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) &hash3[7],
|
||||
(const byte*)&hash3[6], 64 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash3[7],
|
||||
(const byte*)hash3[6], 64 );
|
||||
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) &hash0[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, &hash0[8]);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash0[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash0[8]);
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) &hash1[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, &hash1[8]);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash1[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash1[8]);
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) &hash2[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, &hash2[8]);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash2[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash2[8]);
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) &hash3[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, &hash3[8]);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash3[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash3[8]);
|
||||
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)&hash0[9],
|
||||
(const BitSequence*)&hash0[8], 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)hash0[9],
|
||||
(const BitSequence*)hash0[8], 512 );
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)&hash1[9],
|
||||
(const BitSequence*)&hash1[8], 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)hash1[9],
|
||||
(const BitSequence*)hash1[8], 512 );
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)&hash2[9],
|
||||
(const BitSequence*)&hash2[8], 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)hash2[9],
|
||||
(const BitSequence*)hash2[8], 512 );
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)&hash3[9],
|
||||
(const BitSequence*)&hash3[8], 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)hash3[9],
|
||||
(const BitSequence*)hash3[8], 512 );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)&hash0[10],
|
||||
(const BitSequence*)&hash0[9], 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash0[10],
|
||||
(const BitSequence*)hash0[9], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)&hash1[10],
|
||||
(const BitSequence*)&hash1[9], 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash1[10],
|
||||
(const BitSequence*)hash1[9], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)&hash2[10],
|
||||
(const BitSequence*)&hash2[9], 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash2[10],
|
||||
(const BitSequence*)hash2[9], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)&hash3[10],
|
||||
(const BitSequence*)&hash3[9], 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash3[10],
|
||||
(const BitSequence*)hash3[9], 512 );
|
||||
|
||||
intrlv_4x64_512( vhash, &hash0[10], &hash1[10], &hash2[10], &hash3[10] );
|
||||
intrlv_4x64_512( vhash, hash0[10], hash1[10], hash2[10], hash3[10] );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64_512( &hash0[11], &hash1[11], &hash2[11], &hash3[11], vhash );
|
||||
dintrlv_4x64_512( hash0[11], hash1[11], hash2[11], hash3[11], vhash );
|
||||
|
||||
sph_fugue512_init(&ctx.fugue);
|
||||
sph_fugue512(&ctx.fugue, (const void*) &hash0[11], 64);
|
||||
sph_fugue512_close(&ctx.fugue, &hash0[12]);
|
||||
sph_fugue512(&ctx.fugue, (const void*) hash0[11], 64);
|
||||
sph_fugue512_close(&ctx.fugue, hash0[12]);
|
||||
sph_fugue512_init(&ctx.fugue);
|
||||
sph_fugue512(&ctx.fugue, (const void*) &hash1[11], 64);
|
||||
sph_fugue512_close(&ctx.fugue, &hash1[12]);
|
||||
sph_fugue512(&ctx.fugue, (const void*) hash1[11], 64);
|
||||
sph_fugue512_close(&ctx.fugue, hash1[12]);
|
||||
sph_fugue512_init(&ctx.fugue);
|
||||
sph_fugue512(&ctx.fugue, (const void*) &hash2[11], 64);
|
||||
sph_fugue512_close(&ctx.fugue, &hash2[12]);
|
||||
sph_fugue512(&ctx.fugue, (const void*) hash2[11], 64);
|
||||
sph_fugue512_close(&ctx.fugue, hash2[12]);
|
||||
sph_fugue512_init(&ctx.fugue);
|
||||
sph_fugue512(&ctx.fugue, (const void*) &hash3[11], 64);
|
||||
sph_fugue512_close(&ctx.fugue, &hash3[12]);
|
||||
sph_fugue512(&ctx.fugue, (const void*) hash3[11], 64);
|
||||
sph_fugue512_close(&ctx.fugue, hash3[12]);
|
||||
|
||||
intrlv_4x32_512( vhash, &hash0[12], &hash1[12], &hash2[12], &hash3[12] );
|
||||
intrlv_4x32_512( vhash, hash0[12], hash1[12], hash2[12], hash3[12] );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
dintrlv_4x32_512( &hash0[13], &hash1[13], &hash2[13], &hash3[13], vhash );
|
||||
dintrlv_4x32_512( hash0[13], hash1[13], hash2[13], hash3[13], vhash );
|
||||
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) &hash0[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, &hash0[14]);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) hash0[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash0[14]);
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) &hash1[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, &hash1[14]);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) hash1[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash1[14]);
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) &hash2[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, &hash2[14]);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) hash2[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash2[14]);
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) &hash3[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, &hash3[14]);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) hash3[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash3[14]);
|
||||
|
||||
intrlv_4x64_512( vhash, &hash0[14], &hash1[14], &hash2[14], &hash3[14] );
|
||||
intrlv_4x64_512( vhash, hash0[14], hash1[14], hash2[14], hash3[14] );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
dintrlv_4x64_512( &hash0[15], &hash1[15], &hash2[15], &hash3[15], vhash );
|
||||
dintrlv_4x64_512( hash0[15], hash1[15], hash2[15], hash3[15], vhash );
|
||||
|
||||
|
||||
ComputeSingleSWIFFTX((unsigned char*)&hash0[12], (unsigned char*)&hash0[16]);
|
||||
ComputeSingleSWIFFTX((unsigned char*)&hash1[12], (unsigned char*)&hash1[16]);
|
||||
ComputeSingleSWIFFTX((unsigned char*)&hash2[12], (unsigned char*)&hash2[16]);
|
||||
ComputeSingleSWIFFTX((unsigned char*)&hash3[12], (unsigned char*)&hash3[16]);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash0[12], (unsigned char*)hash0[16]);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash1[12], (unsigned char*)hash1[16]);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash2[12], (unsigned char*)hash2[16]);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash3[12], (unsigned char*)hash3[16]);
|
||||
|
||||
intrlv_4x32_512( &vhashA, &hash0[16], &hash1[16], &hash2[16], &hash3[16] );
|
||||
intrlv_4x32_512( vhashX[0], hash0[16], hash1[16], hash2[16], hash3[16] );
|
||||
|
||||
memset( vhash, 0, 64*4 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhashA, 64 );
|
||||
haval256_5_4way( &ctx.haval, vhashX[0], 64 );
|
||||
haval256_5_4way_close( &ctx.haval, vhash );
|
||||
dintrlv_4x32_512( &hash0[17], &hash1[17], &hash2[17], &hash3[17], vhash );
|
||||
dintrlv_4x32_512( hash0[17], hash1[17], hash2[17], hash3[17], vhash );
|
||||
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) &hash0[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) &hash0[18]);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash0[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hash0[18]);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) &hash1[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) &hash1[18]);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash1[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hash1[18]);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) &hash2[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) &hash2[18]);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash2[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hash2[18]);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) &hash3[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) &hash3[18]);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash3[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hash3[18]);
|
||||
|
||||
LYRA2RE( (void*)&hash0[19], 32, (const void*)&hash0[18], 32,
|
||||
(const void*)&hash0[18], 32, 1, 4, 4 );
|
||||
LYRA2RE( (void*)&hash1[19], 32, (const void*)&hash1[18], 32,
|
||||
(const void*)&hash1[18], 32, 1, 4, 4 );
|
||||
LYRA2RE( (void*)&hash2[19], 32, (const void*)&hash2[18], 32,
|
||||
(const void*)&hash2[18], 32, 1, 4, 4 );
|
||||
LYRA2RE( (void*)&hash3[19], 32, (const void*)&hash3[18], 32,
|
||||
(const void*)&hash3[18], 32, 1, 4, 4 );
|
||||
LYRA2RE( (void*)hash0[19], 32, (const void*)hash0[18], 32,
|
||||
(const void*)hash0[18], 32, 1, 4, 4 );
|
||||
LYRA2RE( (void*)hash1[19], 32, (const void*)hash1[18], 32,
|
||||
(const void*)hash1[18], 32, 1, 4, 4 );
|
||||
LYRA2RE( (void*)hash2[19], 32, (const void*)hash2[18], 32,
|
||||
(const void*)hash2[18], 32, 1, 4, 4 );
|
||||
LYRA2RE( (void*)hash3[19], 32, (const void*)hash3[18], 32,
|
||||
(const void*)hash3[18], 32, 1, 4, 4 );
|
||||
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) &hash0[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) &hash0[20]);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash0[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash0[20]);
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) &hash1[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) &hash1[20]);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash1[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash1[20]);
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) &hash2[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) &hash2[20]);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash2[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash2[20]);
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) &hash3[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) &hash3[20]);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash3[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash3[20]);
|
||||
|
||||
intrlv_4x32_512( vhashA, &hash0[20], &hash1[20], &hash2[20], &hash3[20] );
|
||||
intrlv_4x32_512( vhashX[0], hash0[20], hash1[20], hash2[20], hash3[20] );
|
||||
memset( vhash, 0, 64*4 );
|
||||
|
||||
sha256_4way_init( &ctx.sha256 );
|
||||
sha256_4way( &ctx.sha256, vhashA, 64 );
|
||||
sha256_4way( &ctx.sha256, vhashX[0], 64 );
|
||||
sha256_4way_close( &ctx.sha256, vhash );
|
||||
dintrlv_4x32_512( &hash0[21], &hash1[21], &hash2[21], &hash3[21], vhash );
|
||||
dintrlv_4x32_512( hash0[21], hash1[21], hash2[21], hash3[21], vhash );
|
||||
|
||||
sph_panama_init(&ctx.panama);
|
||||
sph_panama (&ctx.panama, (const void*) &hash0[21], 64 );
|
||||
sph_panama_close(&ctx.panama, (void*) &hash0[22]);
|
||||
sph_panama (&ctx.panama, (const void*) hash0[21], 64 );
|
||||
sph_panama_close(&ctx.panama, (void*) hash0[22]);
|
||||
sph_panama_init(&ctx.panama);
|
||||
sph_panama (&ctx.panama, (const void*) &hash1[21], 64 );
|
||||
sph_panama_close(&ctx.panama, (void*) &hash1[22]);
|
||||
sph_panama (&ctx.panama, (const void*) hash1[21], 64 );
|
||||
sph_panama_close(&ctx.panama, (void*) hash1[22]);
|
||||
sph_panama_init(&ctx.panama);
|
||||
sph_panama (&ctx.panama, (const void*) &hash2[21], 64 );
|
||||
sph_panama_close(&ctx.panama, (void*) &hash2[22]);
|
||||
sph_panama (&ctx.panama, (const void*) hash2[21], 64 );
|
||||
sph_panama_close(&ctx.panama, (void*) hash2[22]);
|
||||
sph_panama_init(&ctx.panama);
|
||||
sph_panama (&ctx.panama, (const void*) &hash3[21], 64 );
|
||||
sph_panama_close(&ctx.panama, (void*) &hash3[22]);
|
||||
sph_panama (&ctx.panama, (const void*) hash3[21], 64 );
|
||||
sph_panama_close(&ctx.panama, (void*) hash3[22]);
|
||||
|
||||
laneHash(512, (const BitSequence*)&hash0[22], 512, (BitSequence*)&hash0[23]);
|
||||
laneHash(512, (const BitSequence*)&hash1[22], 512, (BitSequence*)&hash1[23]);
|
||||
laneHash(512, (const BitSequence*)&hash2[22], 512, (BitSequence*)&hash2[23]);
|
||||
laneHash(512, (const BitSequence*)&hash3[22], 512, (BitSequence*)&hash3[23]);
|
||||
laneHash(512, (const BitSequence*)hash0[22], 512, (BitSequence*)hash0[23]);
|
||||
laneHash(512, (const BitSequence*)hash1[22], 512, (BitSequence*)hash1[23]);
|
||||
laneHash(512, (const BitSequence*)hash2[22], 512, (BitSequence*)hash2[23]);
|
||||
laneHash(512, (const BitSequence*)hash3[22], 512, (BitSequence*)hash3[23]);
|
||||
|
||||
x25x_shuffle( hash0 );
|
||||
x25x_shuffle( hash1 );
|
||||
x25x_shuffle( hash2 );
|
||||
x25x_shuffle( hash3 );
|
||||
|
||||
intrlv_4x32_512( &vhashA[ 0], &hash0[ 0], &hash1[ 0], &hash2[ 0], &hash3[ 0] );
|
||||
intrlv_4x32_512( &vhashA[ 1], &hash0[ 1], &hash1[ 1], &hash2[ 1], &hash3[ 1] );
|
||||
intrlv_4x32_512( &vhashA[ 2], &hash0[ 2], &hash1[ 2], &hash2[ 2], &hash3[ 2] );
|
||||
intrlv_4x32_512( &vhashA[ 3], &hash0[ 3], &hash1[ 3], &hash2[ 3], &hash3[ 3] );
|
||||
intrlv_4x32_512( &vhashA[ 4], &hash0[ 4], &hash1[ 4], &hash2[ 4], &hash3[ 4] );
|
||||
intrlv_4x32_512( &vhashA[ 5], &hash0[ 5], &hash1[ 5], &hash2[ 5], &hash3[ 5] );
|
||||
intrlv_4x32_512( &vhashA[ 6], &hash0[ 6], &hash1[ 6], &hash2[ 6], &hash3[ 6] );
|
||||
intrlv_4x32_512( &vhashA[ 7], &hash0[ 7], &hash1[ 7], &hash2[ 7], &hash3[ 7] );
|
||||
intrlv_4x32_512( &vhashA[ 8], &hash0[ 8], &hash1[ 8], &hash2[ 8], &hash3[ 8] );
|
||||
intrlv_4x32_512( &vhashA[ 9], &hash0[ 9], &hash1[ 9], &hash2[ 9], &hash3[ 9] );
|
||||
intrlv_4x32_512( &vhashA[10], &hash0[10], &hash1[10], &hash2[10], &hash3[10] );
|
||||
intrlv_4x32_512( &vhashA[11], &hash0[11], &hash1[11], &hash2[11], &hash3[11] );
|
||||
intrlv_4x32_512( &vhashA[12], &hash0[12], &hash1[12], &hash2[12], &hash3[12] );
|
||||
intrlv_4x32_512( &vhashA[13], &hash0[13], &hash1[13], &hash2[13], &hash3[13] );
|
||||
intrlv_4x32_512( &vhashA[14], &hash0[14], &hash1[14], &hash2[14], &hash3[14] );
|
||||
intrlv_4x32_512( &vhashA[15], &hash0[15], &hash1[15], &hash2[15], &hash3[15] );
|
||||
intrlv_4x32_512( &vhashA[16], &hash0[16], &hash1[16], &hash2[16], &hash3[16] );
|
||||
intrlv_4x32_512( &vhashA[17], &hash0[17], &hash1[17], &hash2[17], &hash3[17] );
|
||||
intrlv_4x32_512( &vhashA[18], &hash0[18], &hash1[18], &hash2[18], &hash3[18] );
|
||||
intrlv_4x32_512( &vhashA[19], &hash0[19], &hash1[19], &hash2[19], &hash3[19] );
|
||||
intrlv_4x32_512( &vhashA[20], &hash0[20], &hash1[20], &hash2[20], &hash3[20] );
|
||||
intrlv_4x32_512( &vhashA[21], &hash0[21], &hash1[21], &hash2[21], &hash3[21] );
|
||||
intrlv_4x32_512( &vhashA[22], &hash0[22], &hash1[22], &hash2[22], &hash3[22] );
|
||||
intrlv_4x32_512( &vhashA[23], &hash0[23], &hash1[23], &hash2[23], &hash3[23] );
|
||||
intrlv_4x32_512( vhashX[ 0], hash0[ 0], hash1[ 0], hash2[ 0], hash3[ 0] );
|
||||
intrlv_4x32_512( vhashX[ 1], hash0[ 1], hash1[ 1], hash2[ 1], hash3[ 1] );
|
||||
intrlv_4x32_512( vhashX[ 2], hash0[ 2], hash1[ 2], hash2[ 2], hash3[ 2] );
|
||||
intrlv_4x32_512( vhashX[ 3], hash0[ 3], hash1[ 3], hash2[ 3], hash3[ 3] );
|
||||
intrlv_4x32_512( vhashX[ 4], hash0[ 4], hash1[ 4], hash2[ 4], hash3[ 4] );
|
||||
intrlv_4x32_512( vhashX[ 5], hash0[ 5], hash1[ 5], hash2[ 5], hash3[ 5] );
|
||||
intrlv_4x32_512( vhashX[ 6], hash0[ 6], hash1[ 6], hash2[ 6], hash3[ 6] );
|
||||
intrlv_4x32_512( vhashX[ 7], hash0[ 7], hash1[ 7], hash2[ 7], hash3[ 7] );
|
||||
intrlv_4x32_512( vhashX[ 8], hash0[ 8], hash1[ 8], hash2[ 8], hash3[ 8] );
|
||||
intrlv_4x32_512( vhashX[ 9], hash0[ 9], hash1[ 9], hash2[ 9], hash3[ 9] );
|
||||
intrlv_4x32_512( vhashX[10], hash0[10], hash1[10], hash2[10], hash3[10] );
|
||||
intrlv_4x32_512( vhashX[11], hash0[11], hash1[11], hash2[11], hash3[11] );
|
||||
intrlv_4x32_512( vhashX[12], hash0[12], hash1[12], hash2[12], hash3[12] );
|
||||
intrlv_4x32_512( vhashX[13], hash0[13], hash1[13], hash2[13], hash3[13] );
|
||||
intrlv_4x32_512( vhashX[14], hash0[14], hash1[14], hash2[14], hash3[14] );
|
||||
intrlv_4x32_512( vhashX[15], hash0[15], hash1[15], hash2[15], hash3[15] );
|
||||
intrlv_4x32_512( vhashX[16], hash0[16], hash1[16], hash2[16], hash3[16] );
|
||||
intrlv_4x32_512( vhashX[17], hash0[17], hash1[17], hash2[17], hash3[17] );
|
||||
intrlv_4x32_512( vhashX[18], hash0[18], hash1[18], hash2[18], hash3[18] );
|
||||
intrlv_4x32_512( vhashX[19], hash0[19], hash1[19], hash2[19], hash3[19] );
|
||||
intrlv_4x32_512( vhashX[20], hash0[20], hash1[20], hash2[20], hash3[20] );
|
||||
intrlv_4x32_512( vhashX[21], hash0[21], hash1[21], hash2[21], hash3[21] );
|
||||
intrlv_4x32_512( vhashX[22], hash0[22], hash1[22], hash2[22], hash3[22] );
|
||||
intrlv_4x32_512( vhashX[23], hash0[23], hash1[23], hash2[23], hash3[23] );
|
||||
|
||||
blake2s_4way_init( &ctx.blake2s, 32 );
|
||||
blake2s_4way_full_blocks( &ctx.blake2s, vhash, vhashA, 64*24 );
|
||||
|
||||
dintrlv_4x32( &hash0[24], &hash1[24], &hash2[24], &hash3[24], vhash, 256 );
|
||||
blake2s_4way_full_blocks( &ctx.blake2s, output, vhashX, 64*24 );
|
||||
/*
|
||||
dintrlv_4x32( hash0[24], hash1[24], hash2[24], hash3[24], vhash, 256 );
|
||||
|
||||
memcpy(output, &hash0[24], 32);
|
||||
memcpy(output+32, &hash1[24], 32);
|
||||
memcpy(output+64, &hash2[24], 32);
|
||||
memcpy(output+96, &hash3[24], 32);
|
||||
memcpy(output, hash0[24], 32);
|
||||
memcpy(output+32, hash1[24], 32);
|
||||
memcpy(output+64, hash2[24], 32);
|
||||
memcpy(output+96, hash3[24], 32);
|
||||
*/
|
||||
}
|
||||
|
||||
int scanhash_x25x_4way( struct work* work, uint32_t max_nonce,
|
||||
@@ -365,11 +371,14 @@ int scanhash_x25x_4way( struct work* work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
@@ -385,6 +394,16 @@ int scanhash_x25x_4way( struct work* work, uint32_t max_nonce,
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x25x_4way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
/*
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( unlikely( (hash+(i<<3))[7] <= Htarg ) )
|
||||
if( likely( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) )
|
||||
@@ -392,10 +411,11 @@ int scanhash_x25x_4way( struct work* work, uint32_t max_nonce,
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
*/
|
||||
n += 4;
|
||||
} while ( likely( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart ) );
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.10.5.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.10.6.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.10.5'
|
||||
PACKAGE_STRING='cpuminer-opt 3.10.5'
|
||||
PACKAGE_VERSION='3.10.6'
|
||||
PACKAGE_STRING='cpuminer-opt 3.10.6'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.10.5 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.10.6 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1404,7 +1404,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.10.5:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.10.6:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1509,7 +1509,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.10.5
|
||||
cpuminer-opt configure 3.10.6
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.10.5, which was
|
||||
It was created by cpuminer-opt $as_me 3.10.6, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2993,7 +2993,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.10.5'
|
||||
VERSION='3.10.6'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.10.5, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.10.6, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.10.5
|
||||
cpuminer-opt config.status 3.10.6
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.10.5])
|
||||
AC_INIT([cpuminer-opt], [3.10.6])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
88
cpu-miner.c
88
cpu-miner.c
@@ -872,6 +872,7 @@ static uint64_t accept_sum = 0;
|
||||
static uint64_t reject_sum = 0;
|
||||
static double norm_diff_sum = 0.;
|
||||
static uint32_t last_block_height = 0;
|
||||
static bool new_job = false;
|
||||
static double last_targetdiff = 0.;
|
||||
static double ref_rate_hi = 0.;
|
||||
static double ref_rate_lo = 1e100;
|
||||
@@ -887,6 +888,7 @@ struct share_stats_t
|
||||
double share_diff;
|
||||
double stratum_diff;
|
||||
double target_diff;
|
||||
char job_id[32];
|
||||
};
|
||||
|
||||
#define s_stats_size 8
|
||||
@@ -1093,8 +1095,9 @@ static int share_result( int result, struct work *null_work,
|
||||
rejected_share_count, solved_block_count );
|
||||
|
||||
if ( have_stratum && !opt_quiet )
|
||||
applog2( LOG_INFO, "Share diff %.3g (%5f%%), block %d",
|
||||
my_stats.share_diff, share_ratio, stratum.block_height );
|
||||
applog2( LOG_INFO, "Share diff %.3g (%5f%%), block %d, job %s",
|
||||
my_stats.share_diff, share_ratio, stratum.block_height,
|
||||
my_stats.job_id );
|
||||
|
||||
if ( reason )
|
||||
{
|
||||
@@ -1762,6 +1765,7 @@ void work_set_target_ratio( struct work* work, uint32_t* hash )
|
||||
share_stats[ s_put_ptr ].net_diff = net_diff;
|
||||
share_stats[ s_put_ptr ].stratum_diff = stratum_diff;
|
||||
share_stats[ s_put_ptr ].target_diff = work->targetdiff;
|
||||
strcpy( share_stats[ s_put_ptr ].job_id, work->job_id );
|
||||
|
||||
s_put_ptr = stats_ptr_incr( s_put_ptr );
|
||||
|
||||
@@ -2586,28 +2590,38 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
|| ( last_block_height != sctx->block_height ) )
|
||||
{
|
||||
double hr = 0.;
|
||||
|
||||
new_job = false;
|
||||
pthread_mutex_lock( &stats_lock );
|
||||
|
||||
for ( int i = 0; i < opt_n_threads; i++ )
|
||||
hr += thr_hashrates[i];
|
||||
global_hashrate = hr;
|
||||
pthread_mutex_unlock( &stats_lock );
|
||||
|
||||
if ( stratum_diff != sctx->job.diff )
|
||||
applog( LOG_BLUE, "New stratum difficulty" );
|
||||
if ( last_block_height != sctx->block_height )
|
||||
applog( LOG_BLUE, "New block" );
|
||||
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
if ( stratum_diff != sctx->job.diff )
|
||||
applog( LOG_BLUE, "New stratum diff %g, block %d, job %s",
|
||||
sctx->job.diff, sctx->block_height, g_work->job_id );
|
||||
else if ( last_block_height != sctx->block_height )
|
||||
applog( LOG_BLUE, "New block %d, job %s", sctx->block_height,
|
||||
g_work->job_id );
|
||||
else
|
||||
applog( LOG_BLUE,"New job %s.", g_work->job_id );
|
||||
}
|
||||
|
||||
// Update data and calculate new estimates.
|
||||
stratum_diff = sctx->job.diff;
|
||||
last_block_height = stratum.block_height;
|
||||
last_targetdiff = g_work->targetdiff;
|
||||
|
||||
applog2( LOG_INFO, "%s %s block %d", short_url,
|
||||
algo_names[opt_algo], stratum.block_height );
|
||||
applog2( LOG_INFO, "Diff: net %g, stratum %g, target %g",
|
||||
net_diff, stratum_diff, last_targetdiff );
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
applog2( LOG_INFO, "%s %s block %d", short_url,
|
||||
algo_names[opt_algo], stratum.block_height );
|
||||
applog2( LOG_INFO, "Diff: net %g, stratum %g, target %g",
|
||||
net_diff, stratum_diff, last_targetdiff );
|
||||
}
|
||||
|
||||
if ( hr > 0. )
|
||||
{
|
||||
@@ -2619,10 +2633,13 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
sprintf_et( share_ttf, last_targetdiff * diff_to_hash / hr );
|
||||
scale_hash_for_display ( &hr, hr_units );
|
||||
|
||||
applog2( LOG_INFO, "TTF @ %.2f %sh/s: block %s, share %s",
|
||||
hr, hr_units, block_ttf, share_ttf );
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
applog2( LOG_INFO, "TTF @ %.2f %sh/s: block %s, share %s",
|
||||
hr, hr_units, block_ttf, share_ttf );
|
||||
}
|
||||
}
|
||||
}
|
||||
} // new diff/block
|
||||
}
|
||||
|
||||
void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
@@ -2700,25 +2717,23 @@ static void *stratum_thread(void *userdata )
|
||||
if ( stratum.job.job_id
|
||||
&& ( !g_work_time || strcmp( stratum.job.job_id, g_work.job_id ) ) )
|
||||
{
|
||||
new_job = true;
|
||||
pthread_mutex_lock(&g_work_lock);
|
||||
algo_gate.stratum_gen_work( &stratum, &g_work );
|
||||
time(&g_work_time);
|
||||
pthread_mutex_unlock(&g_work_lock);
|
||||
restart_threads();
|
||||
|
||||
/*
|
||||
if ( stratum.job.clean || jsonrpc_2 )
|
||||
{
|
||||
static uint32_t last_block_height;
|
||||
if ( last_block_height != stratum.block_height )
|
||||
{
|
||||
last_block_height = stratum.block_height;
|
||||
if ( !opt_quiet && last_block_height && new_job
|
||||
&& ( last_block_height == stratum.block_height ) )
|
||||
{
|
||||
new_job = false;
|
||||
applog( LOG_BLUE,"New job %s", g_work.job_id );
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
*/
|
||||
if (opt_debug && !opt_quiet)
|
||||
else if (opt_debug && !opt_quiet)
|
||||
{
|
||||
applog( LOG_BLUE, "%s asks job %d for block %d", short_url,
|
||||
strtoul( stratum.job.job_id, NULL, 16 ), stratum.block_height );
|
||||
@@ -2960,9 +2975,7 @@ void parse_arg(int key, char *arg )
|
||||
show_usage_and_exit(1);
|
||||
opt_retries = v;
|
||||
break;
|
||||
// case 'R':
|
||||
// applog(LOG_WARNING,"\n-R is no longer valid, use --retry-pause instead.");
|
||||
case 1025:
|
||||
case 1025:
|
||||
v = atoi(arg);
|
||||
if (v < 1 || v > 9999) /* sanity check */
|
||||
show_usage_and_exit(1);
|
||||
@@ -3018,11 +3031,14 @@ void parse_arg(int key, char *arg )
|
||||
*hp++ = '@';
|
||||
} else
|
||||
hp = ap;
|
||||
if (ap != arg) {
|
||||
if (strncasecmp(arg, "http://", 7) &&
|
||||
strncasecmp(arg, "https://", 8) &&
|
||||
strncasecmp(arg, "stratum+tcp://", 14)) {
|
||||
fprintf(stderr, "unknown protocol -- '%s'\n", arg);
|
||||
if ( ap != arg )
|
||||
{
|
||||
if ( strncasecmp( arg, "http://", 7 )
|
||||
&& strncasecmp( arg, "https://", 8 )
|
||||
&& strncasecmp( arg, "stratum+tcp://", 14 )
|
||||
&& strncasecmp( arg, "stratum+tcps://", 15 ) )
|
||||
{
|
||||
fprintf(stderr, "unknown protocol -- '%s'\n", arg);
|
||||
show_usage_and_exit(1);
|
||||
}
|
||||
free(rpc_url);
|
||||
@@ -3427,7 +3443,7 @@ bool check_cpu_capability ()
|
||||
else if ( sw_has_sse42 ) printf( " SSE4.2" );
|
||||
else if ( sw_has_sse2 ) printf( " SSE2 " );
|
||||
if ( sw_has_vaes ) printf( " VAES" );
|
||||
else if ( sw_has_aes ) printf( " AES " );
|
||||
else if ( sw_has_aes ) printf( " AES" );
|
||||
if ( sw_has_sha ) printf( " SHA" );
|
||||
|
||||
printf("\nAlgo features:");
|
||||
@@ -3439,7 +3455,7 @@ bool check_cpu_capability ()
|
||||
else if ( algo_has_sse42 ) printf( " SSE4.2" );
|
||||
else if ( algo_has_sse2 ) printf( " SSE2 " );
|
||||
if ( algo_has_vaes ) printf( " VAES" );
|
||||
else if ( algo_has_aes ) printf( " AES " );
|
||||
else if ( algo_has_aes ) printf( " AES" );
|
||||
if ( algo_has_sha ) printf( " SHA" );
|
||||
}
|
||||
printf("\n");
|
||||
@@ -3619,7 +3635,9 @@ int main(int argc, char *argv[])
|
||||
pthread_mutex_init( &stratum.sock_lock, NULL );
|
||||
pthread_mutex_init( &stratum.work_lock, NULL );
|
||||
|
||||
flags = !opt_benchmark && strncmp( rpc_url, "https:", 6 )
|
||||
flags = !opt_benchmark
|
||||
&& ( strncmp( rpc_url, "https:", 6 )
|
||||
|| strncasecmp(rpc_url, "stratum+tcps://", 15 ) )
|
||||
? ( CURL_GLOBAL_ALL & ~CURL_GLOBAL_SSL )
|
||||
: CURL_GLOBAL_ALL;
|
||||
if ( curl_global_init( flags ) )
|
||||
|
@@ -113,6 +113,9 @@ static inline __m512i m512_const_64( const uint64_t i7, const uint64_t i6,
|
||||
m512_const1_64( ( ( ( (uint64_t)(i1) << 32 ) ) \
|
||||
| ( (uint64_t)(i0) & 0xffffffff ) ) )
|
||||
|
||||
// { m128_1, m128_1, m128_0, m128_0 }
|
||||
#define m512_const_2x128( v1, v0 ) \
|
||||
m512_mask_blend_epi64( 0x0f, m512_const1_128( v1 ), m512_const1_128( v0 ) )
|
||||
|
||||
static inline __m512i m512_const4_64( const uint64_t i3, const uint64_t i2,
|
||||
const uint64_t i1, const uint64_t i0 )
|
||||
|
66
util.c
66
util.c
@@ -1069,7 +1069,7 @@ double target_to_diff(uint32_t* target)
|
||||
#define socket_blocks() (errno == EAGAIN || errno == EWOULDBLOCK)
|
||||
#endif
|
||||
|
||||
static bool send_line(curl_socket_t sock, char *s)
|
||||
static bool send_line( struct stratum_ctx *sctx, char *s )
|
||||
{
|
||||
size_t sent = 0;
|
||||
int len;
|
||||
@@ -1077,24 +1077,35 @@ static bool send_line(curl_socket_t sock, char *s)
|
||||
len = (int) strlen(s);
|
||||
s[len++] = '\n';
|
||||
|
||||
while (len > 0) {
|
||||
while ( len > 0 )
|
||||
{
|
||||
struct timeval timeout = {0, 0};
|
||||
int n;
|
||||
fd_set wd;
|
||||
|
||||
FD_ZERO(&wd);
|
||||
FD_SET(sock, &wd);
|
||||
if (select((int) (sock + 1), NULL, &wd, NULL, &timeout) < 1)
|
||||
FD_ZERO( &wd );
|
||||
FD_SET( sctx->sock, &wd );
|
||||
if ( select( (int) ( sctx->sock + 1 ), NULL, &wd, NULL, &timeout ) < 1 )
|
||||
return false;
|
||||
n = send(sock, s + sent, len, 0);
|
||||
if (n < 0) {
|
||||
if (!socket_blocks())
|
||||
return false;
|
||||
n = 0;
|
||||
}
|
||||
|
||||
#if LIBCURL_VERSION_NUM >= 0x071802
|
||||
|
||||
CURLcode rc = curl_easy_send(sctx->curl, s + sent, len, (size_t *)&n);
|
||||
if ( rc != CURLE_OK )
|
||||
{
|
||||
if ( rc != CURLE_AGAIN )
|
||||
#else
|
||||
n = send(sock, s + sent, len, 0);
|
||||
if ( n < 0 )
|
||||
{
|
||||
if ( !socket_blocks() )
|
||||
#endif
|
||||
return false;
|
||||
n = 0;
|
||||
}
|
||||
sent += n;
|
||||
len -= n;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1107,7 +1118,7 @@ bool stratum_send_line(struct stratum_ctx *sctx, char *s)
|
||||
applog(LOG_DEBUG, "> %s", s);
|
||||
|
||||
pthread_mutex_lock(&sctx->sock_lock);
|
||||
ret = send_line(sctx->sock, s);
|
||||
ret = send_line( sctx, s );
|
||||
pthread_mutex_unlock(&sctx->sock_lock);
|
||||
|
||||
return ret;
|
||||
@@ -1167,14 +1178,27 @@ char *stratum_recv_line(struct stratum_ctx *sctx)
|
||||
ssize_t n;
|
||||
|
||||
memset(s, 0, RBUFSIZE);
|
||||
n = recv(sctx->sock, s, RECVSIZE, 0);
|
||||
|
||||
#if LIBCURL_VERSION_NUM >= 0x071802
|
||||
|
||||
CURLcode rc = curl_easy_recv(sctx->curl, s, RECVSIZE, (size_t *)&n);
|
||||
if (rc == CURLE_OK && !n) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
if (rc != CURLE_OK) {
|
||||
if (rc != CURLE_AGAIN || !socket_full(sctx->sock, 1)) {
|
||||
#else
|
||||
|
||||
n = recv(sctx->sock, s, RECVSIZE, 0);
|
||||
if (!n) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
if (n < 0) {
|
||||
if (!socket_blocks() || !socket_full(sctx->sock, 1)) {
|
||||
ret = false;
|
||||
#endif
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
} else
|
||||
@@ -1244,7 +1268,9 @@ bool stratum_connect(struct stratum_ctx *sctx, const char *url)
|
||||
}
|
||||
free(sctx->curl_url);
|
||||
sctx->curl_url = (char*) malloc(strlen(url));
|
||||
sprintf(sctx->curl_url, "http%s", strstr(url, "://"));
|
||||
sprintf( sctx->curl_url, "http%s", strstr( url, "s://" )
|
||||
? strstr( url, "s://" )
|
||||
: strstr (url, "://" ) );
|
||||
|
||||
if (opt_protocol)
|
||||
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
|
||||
@@ -1254,7 +1280,9 @@ bool stratum_connect(struct stratum_ctx *sctx, const char *url)
|
||||
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, sctx->curl_err_str);
|
||||
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
|
||||
curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1);
|
||||
if (opt_proxy) {
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
|
||||
if (opt_proxy) {
|
||||
curl_easy_setopt(curl, CURLOPT_PROXY, opt_proxy);
|
||||
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, opt_proxy_type);
|
||||
}
|
||||
@@ -1954,7 +1982,9 @@ static bool stratum_reconnect(struct stratum_ctx *sctx, json_t *params)
|
||||
return false;
|
||||
|
||||
url = (char*) malloc(32 + strlen(host));
|
||||
sprintf(url, "stratum+tcp://%s:%d", host, port);
|
||||
|
||||
strncpy( url, sctx->url, 15 );
|
||||
sprintf( strstr( url, "://" ) + 3, "%s:%d", host, port );
|
||||
|
||||
if (!opt_redirect) {
|
||||
applog(LOG_INFO, "Ignoring request to reconnect to %s", url);
|
||||
|
Reference in New Issue
Block a user