mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
2 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
b47cfaa720 | ||
![]() |
241bc26767 |
@@ -84,6 +84,7 @@ cpuminer_SOURCES = \
|
||||
algo/cubehash/cubehash_sse2.c\
|
||||
algo/cubehash/cube-hash-2way.c \
|
||||
algo/echo/sph_echo.c \
|
||||
algo/echo/echo-hash-4way.c \
|
||||
algo/echo/aes_ni/hash.c\
|
||||
algo/gost/sph_gost.c \
|
||||
algo/groestl/sph_groestl.c \
|
||||
|
@@ -7,9 +7,11 @@ Security warning
|
||||
----------------
|
||||
|
||||
Miner programs are often flagged as malware by antivirus programs. This is
|
||||
a false positive, they are flagged simply because they are cryptocurrency
|
||||
miners. The source code is open for anyone to inspect. If you don't trust
|
||||
the software, don't use it.
|
||||
usually a false positive, they are flagged simply because they are
|
||||
cryptocurrency miners. However, some malware has been spread using the
|
||||
cover that miners are known to be subject to false positives. Always be on
|
||||
alert. The source code of cpuminer-opt is open for anyone to inspect.
|
||||
If you don't trust the software don't download it.
|
||||
|
||||
The cryptographic hashing code has been taken from trusted sources but has been
|
||||
modified for speed at the expense of accepted security practices. This
|
||||
@@ -33,6 +35,21 @@ not supported. FreeBSD YMMV.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
|
||||
v3.10.7
|
||||
|
||||
AVX512 for x25x, lbry, x13bcd (bcd).
|
||||
|
||||
v3.10.6
|
||||
|
||||
Added support for SSL stratum: stratum+tcps://
|
||||
|
||||
Added job id reporting again, but leaner, suppressed with --quiet.
|
||||
|
||||
AVX512 for x21s, x22i, lyra2z, allium.
|
||||
|
||||
Fixed share overflow warnings mining lbry with Ryzen (SHA).
|
||||
|
||||
v3.10.5
|
||||
|
||||
AVX512 for x17, sonoa, xevan, hmq1725, lyra2rev3, lyra2rev2.
|
||||
|
@@ -317,6 +317,7 @@ const char* const algo_alias_map[][2] =
|
||||
{ "argon2d-crds", "argon2d250" },
|
||||
{ "argon2d-dyn", "argon2d500" },
|
||||
{ "argon2d-uis", "argon2d4096" },
|
||||
{ "bcd", "x13bcd" },
|
||||
{ "bitcore", "timetravel10" },
|
||||
{ "bitzeny", "yescryptr8" },
|
||||
{ "blake256r8", "blakecoin" },
|
||||
|
@@ -104,7 +104,7 @@ typedef struct {
|
||||
typedef blake_8way_small_context blake256_8way_context;
|
||||
void blake256_8way_init(void *cc);
|
||||
void blake256_8way_update(void *cc, const void *data, size_t len);
|
||||
#define blake256_8way blake256_8way_update
|
||||
//#define blake256_8way blake256_8way_update
|
||||
void blake256_8way_close(void *cc, void *dst);
|
||||
|
||||
// 14 rounds, blake, decred
|
||||
|
@@ -842,7 +842,8 @@ blake32_4way_init( blake_4way_small_context *ctx, const uint32_t *iv,
|
||||
}
|
||||
|
||||
static void
|
||||
blake32_4way( blake_4way_small_context *ctx, const void *data, size_t len )
|
||||
blake32_4way( blake_4way_small_context *ctx, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m128i *buf = (__m128i*)ctx->buf;
|
||||
size_t bptr = ctx->ptr<<2;
|
||||
@@ -1237,7 +1238,7 @@ blake256_4way_init(void *ctx)
|
||||
}
|
||||
|
||||
void
|
||||
blake256_4way(void *ctx, const void *data, size_t len)
|
||||
blake256_4way_update(void *ctx, const void *data, size_t len)
|
||||
{
|
||||
blake32_4way(ctx, data, len);
|
||||
}
|
||||
|
@@ -463,6 +463,38 @@ int blake2s_8way_final( blake2s_8way_state *S, void *out, uint8_t outlen )
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Update and final when inlen is a multiple of 64 bytes
|
||||
int blake2s_8way_full_blocks( blake2s_8way_state *S, void *out,
|
||||
const void *input, uint64_t inlen )
|
||||
{
|
||||
__m256i *in = (__m256i*)input;
|
||||
__m256i *buf = (__m256i*)S->buf;
|
||||
|
||||
while( inlen > BLAKE2S_BLOCKBYTES )
|
||||
{
|
||||
memcpy_256( buf, in, BLAKE2S_BLOCKBYTES >> 2 );
|
||||
S->buflen = BLAKE2S_BLOCKBYTES;
|
||||
inlen -= BLAKE2S_BLOCKBYTES;
|
||||
S->t[0] += BLAKE2S_BLOCKBYTES;
|
||||
S->t[1] += ( S->t[0] < BLAKE2S_BLOCKBYTES );
|
||||
blake2s_8way_compress( S, buf );
|
||||
S->buflen = 0;
|
||||
in += ( BLAKE2S_BLOCKBYTES >> 2 );
|
||||
}
|
||||
|
||||
// last block
|
||||
memcpy_256( buf, in, BLAKE2S_BLOCKBYTES >> 2 );
|
||||
S->buflen = BLAKE2S_BLOCKBYTES;
|
||||
S->t[0] += S->buflen;
|
||||
S->t[1] += ( S->t[0] < S->buflen );
|
||||
if ( S->last_node ) S->f[1] = ~0U;
|
||||
S->f[0] = ~0U;
|
||||
blake2s_8way_compress( S, buf );
|
||||
|
||||
for ( int i = 0; i < 8; ++i )
|
||||
casti_m256i( out, i ) = S->h[ i ];
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // __AVX2__
|
||||
|
||||
|
@@ -14,7 +14,6 @@
|
||||
#ifndef __BLAKE2S_HASH_4WAY_H__
|
||||
#define __BLAKE2S_HASH_4WAY_H__ 1
|
||||
|
||||
//#if defined(__SSE4_2__)
|
||||
#if defined(__SSE2__)
|
||||
|
||||
#include "simd-utils.h"
|
||||
@@ -95,8 +94,8 @@ int blake2s_8way_init( blake2s_8way_state *S, const uint8_t outlen );
|
||||
int blake2s_8way_update( blake2s_8way_state *S, const void *in,
|
||||
uint64_t inlen );
|
||||
int blake2s_8way_final( blake2s_8way_state *S, void *out, uint8_t outlen );
|
||||
//int blake2s_8way_full_blocks( blake2s_8way_state *S, void *out,
|
||||
// const void *input, uint64_t inlen );
|
||||
int blake2s_8way_full_blocks( blake2s_8way_state *S, void *out,
|
||||
const void *input, uint64_t inlen );
|
||||
|
||||
#endif
|
||||
|
||||
@@ -132,6 +131,6 @@ int blake2s_16way_final( blake2s_16way_state *S, void *out, uint8_t outlen );
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __SSE4_2__
|
||||
#endif // __SSE2__
|
||||
|
||||
#endif
|
||||
|
@@ -41,7 +41,6 @@ int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash7[ lane<<1 ] < Htarg ) )
|
||||
// if ( ( ( hash7[ lane<<1 ] & 0xFFFFFF00 ) == 0 ) )
|
||||
{
|
||||
extr_lane_8x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
@@ -66,7 +65,7 @@ void bmw512hash_4way(void *state, const void *input)
|
||||
{
|
||||
bmw512_4way_context ctx;
|
||||
bmw512_4way_init( &ctx );
|
||||
bmw512_4way( &ctx, input, 80 );
|
||||
bmw512_4way_update( &ctx, input, 80 );
|
||||
bmw512_4way_close( &ctx, state );
|
||||
}
|
||||
|
||||
|
559
algo/echo/echo-hash-4way.c
Normal file
559
algo/echo/echo-hash-4way.c
Normal file
@@ -0,0 +1,559 @@
|
||||
#if defined(__AVX512VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
#include "simd-utils.h"
|
||||
#include "echo-hash-4way.h"
|
||||
|
||||
/*
|
||||
#include <memory.h>
|
||||
#include "miner.h"
|
||||
#include "hash_api.h"
|
||||
//#include "vperm.h"
|
||||
#include <immintrin.h>
|
||||
*/
|
||||
/*
|
||||
#ifndef NO_AES_NI
|
||||
#include <wmmintrin.h>
|
||||
#else
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
*/
|
||||
|
||||
// not used
|
||||
/*
|
||||
const unsigned int _k_s0F[] = {0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F};
|
||||
const unsigned int _k_ipt[] = {0x5A2A7000, 0xC2B2E898, 0x52227808, 0xCABAE090, 0x317C4D00, 0x4C01307D, 0xB0FDCC81, 0xCD80B1FC};
|
||||
const unsigned int _k_opt[] = {0xD6B66000, 0xFF9F4929, 0xDEBE6808, 0xF7974121, 0x50BCEC00, 0x01EDBD51, 0xB05C0CE0, 0xE10D5DB1};
|
||||
const unsigned int _k_inv[] = {0x0D080180, 0x0E05060F, 0x0A0B0C02, 0x04070309, 0x0F0B0780, 0x01040A06, 0x02050809, 0x030D0E0C};
|
||||
const unsigned int _k_sb1[] = {0xCB503E00, 0xB19BE18F, 0x142AF544, 0xA5DF7A6E, 0xFAE22300, 0x3618D415, 0x0D2ED9EF, 0x3BF7CCC1};
|
||||
const unsigned int _k_sb2[] = {0x0B712400, 0xE27A93C6, 0xBC982FCD, 0x5EB7E955, 0x0AE12900, 0x69EB8840, 0xAB82234A, 0xC2A163C8};
|
||||
const unsigned int _k_sb3[] = {0xC0211A00, 0x53E17249, 0xA8B2DA89, 0xFB68933B, 0xF0030A00, 0x5FF35C55, 0xA6ACFAA5, 0xF956AF09};
|
||||
const unsigned int _k_sb4[] = {0x3FD64100, 0xE1E937A0, 0x49087E9F, 0xA876DE97, 0xC393EA00, 0x3D50AED7, 0x876D2914, 0xBA44FE79};
|
||||
const unsigned int _k_sb5[] = {0xF4867F00, 0x5072D62F, 0x5D228BDB, 0x0DA9A4F9, 0x3971C900, 0x0B487AC2, 0x8A43F0FB, 0x81B332B8};
|
||||
const unsigned int _k_sb7[] = {0xFFF75B00, 0xB20845E9, 0xE1BAA416, 0x531E4DAC, 0x3390E000, 0x62A3F282, 0x21C1D3B1, 0x43125170};
|
||||
const unsigned int _k_sbo[] = {0x6FBDC700, 0xD0D26D17, 0xC502A878, 0x15AABF7A, 0x5FBB6A00, 0xCFE474A5, 0x412B35FA, 0x8E1E90D1};
|
||||
const unsigned int _k_h63[] = {0x63636363, 0x63636363, 0x63636363, 0x63636363};
|
||||
const unsigned int _k_hc6[] = {0xc6c6c6c6, 0xc6c6c6c6, 0xc6c6c6c6, 0xc6c6c6c6};
|
||||
const unsigned int _k_h5b[] = {0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b};
|
||||
const unsigned int _k_h4e[] = {0x4e4e4e4e, 0x4e4e4e4e, 0x4e4e4e4e, 0x4e4e4e4e};
|
||||
const unsigned int _k_h0e[] = {0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e};
|
||||
const unsigned int _k_h15[] = {0x15151515, 0x15151515, 0x15151515, 0x15151515};
|
||||
const unsigned int _k_aesmix1[] = {0x0f0a0500, 0x030e0904, 0x07020d08, 0x0b06010c};
|
||||
const unsigned int _k_aesmix2[] = {0x000f0a05, 0x04030e09, 0x0807020d, 0x0c0b0601};
|
||||
const unsigned int _k_aesmix3[] = {0x05000f0a, 0x0904030e, 0x0d080702, 0x010c0b06};
|
||||
const unsigned int _k_aesmix4[] = {0x0a05000f, 0x0e090403, 0x020d0807, 0x06010c0b};
|
||||
*/
|
||||
|
||||
/*
|
||||
MYALIGN const unsigned int const1[] = {0x00000001, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int mul2mask[] = {0x00001b00, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int lsbmask[] = {0x01010101, 0x01010101, 0x01010101, 0x01010101};
|
||||
MYALIGN const unsigned int invshiftrows[] = {0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c};
|
||||
MYALIGN const unsigned int zero[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
*/
|
||||
|
||||
MYALIGN const unsigned int mul2ipt[] = {0x728efc00, 0x6894e61a, 0x3fc3b14d, 0x25d9ab57, 0xfd5ba600, 0x2a8c71d7, 0x1eb845e3, 0xc96f9234};
|
||||
|
||||
// do these need to be reversed?
|
||||
|
||||
#define mul2mask \
|
||||
m512_const4_32( 0x00001b00, 0, 0, 0 )
|
||||
|
||||
#define lsbmask m512_const1_32( 0x01010101 )
|
||||
|
||||
#define ECHO_SUBBYTES( state, i, j ) \
|
||||
state[i][j] = _mm512_aesenc_epi128( state[i][j], k1 ); \
|
||||
state[i][j] = _mm512_aesenc_epi128( state[i][j], m512_zero ); \
|
||||
k1 = _mm512_add_epi32( k1, m512_one_32 )
|
||||
|
||||
#define ECHO_MIXBYTES( state1, state2, j, t1, t2, s2 ) do \
|
||||
{ \
|
||||
const int j1 = ( j+1 ) & 3; \
|
||||
const int j2 = ( j+2 ) & 3; \
|
||||
const int j3 = ( j+3 ) & 3; \
|
||||
s2 = _mm512_add_epi8( state1[ 0 ] [j ], state1[ 0 ][ j ] ); \
|
||||
t1 = _mm512_srli_epi16( state1[ 0 ][ j ], 7 ); \
|
||||
t1 = _mm512_and_si128( t1, lsbmask );\
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 ); \
|
||||
state2[ 0 ] [j ] = s2; \
|
||||
state2[ 1 ] [j ] = state1[ 0 ][ j ]; \
|
||||
state2[ 2 ] [j ] = state1[ 0 ][ j ]; \
|
||||
state2[ 3 ] [j ] = _mm512_xor_si512( s2, state1[ 0 ][ j ] );\
|
||||
s2 = _mm512_add_epi8( state1[ 1 ][ j1 ], state1[ 1 ][ j1 ] ); \
|
||||
t1 = _mm512_srli_epi16( state1[ 1 ][ j1 ], 7 ); \
|
||||
t1 = _mm512_and_si512( t1, lsbmask ); \
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 );\
|
||||
state2[ 0 ][ j ] = _mm512_xor_si512( state2[ 0 ][ j ], \
|
||||
_mm512_xor_si512( s2, state1[ 1 ][ j1 ] ) ); \
|
||||
state2[ 1 ][ j ] = _mm512_xor_si512( state2[ 1 ][ j ], s2 ); \
|
||||
state2[ 2 ][ j ] = _mm512_xor_si512( state2[ 2 ][ j ], state1[ 1 ][ j1 ] ); \
|
||||
state2[ 3 ][ j ] = _mm512_xor_si512( state2[ 3 ][ j ], state1[ 1 ][ j1 ] ); \
|
||||
s2 = _mm512_add_epi8( state1[ 2 ][ j2 ], state1[ 2 ][ j2 ] ); \
|
||||
t1 = _mm512_srli_epi16( state1[ 2 ][ j2 ], 7 ); \
|
||||
t1 = _mm512_and_si512( t1, lsbmask ); \
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 ); \
|
||||
state2[ 0 ][ j ] = _mm512_xor_si512( state2[ 0 ][ j ], state1[ 2 ][ j2 ] ); \
|
||||
state2[ 1 ][ j ] = _mm512_xor_si512( state2[ 1 ][ j ], \
|
||||
_mm512_xor_si512( s2, state1[ 2 ][ j2 ] ) ); \
|
||||
state2[ 2 ][ j ] = _mm512_xor_si512128( state2[ 2 ][ j ], s2 ); \
|
||||
state2[ 3 ][ j ] = _mm512_xor_si512( state2[ 3][ j ], state1[ 2 ][ j2 ] ); \
|
||||
s2 = _mm512_add_epi8( state1[ 3 ][ j3 ], state1[ 3 ][ j3 ] ); \
|
||||
t1 = _mm512_srli_epi16( state1[ 3 ][ j3 ], 7 ); \
|
||||
t1 = _mm512_and_si512( t1, lsbmask ); \
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 ); \
|
||||
state2[ 0 ][ j ] = _mm512_xor_si512( state2[ 0 ][ j ], state1[ 3 ][ j3 ] ); \
|
||||
state2[ 1 ][ j ] = _mm512_xor_si512( state2[ 1 ][ j ], state1[ 3 ][ j3 ] ); \
|
||||
state2[ 2 ][ j ] = _mm512_xor_si512( state2[ 2 ][ j ], \
|
||||
_mm512_xor_si512( s2, state1[ 3 ][ j3] ) ); \
|
||||
state2[ 3 ][ j ] = _mm512_xor_si512( state2[ 3 ][ j ], s2 )
|
||||
} while(0)
|
||||
|
||||
#define ECHO_ROUND_UNROLL2 \
|
||||
ECHO_SUBBYTES(_state, 0, 0);\
|
||||
ECHO_SUBBYTES(_state, 1, 0);\
|
||||
ECHO_SUBBYTES(_state, 2, 0);\
|
||||
ECHO_SUBBYTES(_state, 3, 0);\
|
||||
ECHO_SUBBYTES(_state, 0, 1);\
|
||||
ECHO_SUBBYTES(_state, 1, 1);\
|
||||
ECHO_SUBBYTES(_state, 2, 1);\
|
||||
ECHO_SUBBYTES(_state, 3, 1);\
|
||||
ECHO_SUBBYTES(_state, 0, 2);\
|
||||
ECHO_SUBBYTES(_state, 1, 2);\
|
||||
ECHO_SUBBYTES(_state, 2, 2);\
|
||||
ECHO_SUBBYTES(_state, 3, 2);\
|
||||
ECHO_SUBBYTES(_state, 0, 3);\
|
||||
ECHO_SUBBYTES(_state, 1, 3);\
|
||||
ECHO_SUBBYTES(_state, 2, 3);\
|
||||
ECHO_SUBBYTES(_state, 3, 3);\
|
||||
ECHO_MIXBYTES(_state, _state2, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 3, t1, t2, s2);\
|
||||
ECHO_SUBBYTES(_state2, 0, 0);\
|
||||
ECHO_SUBBYTES(_state2, 1, 0);\
|
||||
ECHO_SUBBYTES(_state2, 2, 0);\
|
||||
ECHO_SUBBYTES(_state2, 3, 0);\
|
||||
ECHO_SUBBYTES(_state2, 0, 1);\
|
||||
ECHO_SUBBYTES(_state2, 1, 1);\
|
||||
ECHO_SUBBYTES(_state2, 2, 1);\
|
||||
ECHO_SUBBYTES(_state2, 3, 1);\
|
||||
ECHO_SUBBYTES(_state2, 0, 2);\
|
||||
ECHO_SUBBYTES(_state2, 1, 2);\
|
||||
ECHO_SUBBYTES(_state2, 2, 2);\
|
||||
ECHO_SUBBYTES(_state2, 3, 2);\
|
||||
ECHO_SUBBYTES(_state2, 0, 3);\
|
||||
ECHO_SUBBYTES(_state2, 1, 3);\
|
||||
ECHO_SUBBYTES(_state2, 2, 3);\
|
||||
ECHO_SUBBYTES(_state2, 3, 3);\
|
||||
ECHO_MIXBYTES(_state2, _state, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 3, t1, t2, s2)
|
||||
|
||||
|
||||
|
||||
#define SAVESTATE(dst, src)\
|
||||
dst[0][0] = src[0][0];\
|
||||
dst[0][1] = src[0][1];\
|
||||
dst[0][2] = src[0][2];\
|
||||
dst[0][3] = src[0][3];\
|
||||
dst[1][0] = src[1][0];\
|
||||
dst[1][1] = src[1][1];\
|
||||
dst[1][2] = src[1][2];\
|
||||
dst[1][3] = src[1][3];\
|
||||
dst[2][0] = src[2][0];\
|
||||
dst[2][1] = src[2][1];\
|
||||
dst[2][2] = src[2][2];\
|
||||
dst[2][3] = src[2][3];\
|
||||
dst[3][0] = src[3][0];\
|
||||
dst[3][1] = src[3][1];\
|
||||
dst[3][2] = src[3][2];\
|
||||
dst[3][3] = src[3][3]
|
||||
|
||||
|
||||
void echo_4way_compress( echo_4way_context *ctx, const unsigned char *pmsg,
|
||||
unsigned int uBlockCount )
|
||||
{
|
||||
unsigned int r, b, i, j;
|
||||
__m512i t1, t2, s2, k1;
|
||||
__m512i _state[4][4], _state2[4][4], _statebackup[4][4];
|
||||
|
||||
// unroll
|
||||
for ( i = 0; i < 4; i++ )
|
||||
for ( j = 0; j < ctx->uHashSize / 256; j++ )
|
||||
_state[ i ][ j ] = ctx->state[ i ][ j ];
|
||||
|
||||
for ( b = 0; b < uBlockCount; b++ )
|
||||
{
|
||||
ctx->k = _mm512_add_epi64( ctx->k, ctx->const1536 );
|
||||
|
||||
// load message, make aligned, remove loadu
|
||||
for( j = ctx->uHashSize / 256; j < 4; j++ )
|
||||
{
|
||||
for ( i = 0; i < 4; i++ )
|
||||
{
|
||||
_state[ i ][ j ] = _mm512_loadu_si512(
|
||||
(__m512i*)pmsg + 4 * (j - (ctx->uHashSize / 256)) + i );
|
||||
}
|
||||
}
|
||||
|
||||
// save state
|
||||
SAVESTATE( _statebackup, _state );
|
||||
|
||||
k1 = ctx->k;
|
||||
|
||||
for ( r = 0; r < ctx->uRounds / 2; r++ )
|
||||
{
|
||||
ECHO_ROUND_UNROLL2;
|
||||
}
|
||||
|
||||
if ( ctx->uHashSize == 256 )
|
||||
{
|
||||
for ( i = 0; i < 4; i++ )
|
||||
{
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_state[ i ][ 1 ] );
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_state[ i ][ 2 ] );
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_state[ i ][ 3 ] );
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_statebackup[ i ][ 0 ] );
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_statebackup[ i ][ 1 ] );
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_statebackup[ i ][ 2 ] ) ;
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_statebackup[ i ][ 3 ] );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for ( i = 0; i < 4; i++ )
|
||||
{
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_state[ i ][ 2 ] );
|
||||
_state[ i ][ 1 ] = _mm512_xor_si512( _state[ i ][ 1 ],
|
||||
_state[ i ][ 3 ] );
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ][ 0 ],
|
||||
_statebackup[ i ][ 0 ] );
|
||||
_state[ i ][ 0 ] = _mm512_xor_si512( _state[ i ] [0 ],
|
||||
_statebackup[ i ][ 2 ] );
|
||||
_state[ i ][ 1 ] = _mm512_xor_si512( _state[ i ][ 1 ],
|
||||
_statebackup[ i ][ 1 ] );
|
||||
_state[ i ][ 1 ] = _mm512_xor_si512( _state[ i ][ 1 ],
|
||||
_statebackup[ i ][ 3 ] );
|
||||
}
|
||||
}
|
||||
pmsg += ctx->uBlockLength;
|
||||
}
|
||||
SAVESTATE(ctx->state, _state);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
int echo_4way_init( echo_4way_context *ctx, int nHashSize )
|
||||
{
|
||||
int i, j;
|
||||
|
||||
ctx->k = m512_zero;
|
||||
ctx->processed_bits = 0;
|
||||
ctx->uBufferBytes = 0;
|
||||
|
||||
switch( nHashSize )
|
||||
{
|
||||
case 256:
|
||||
ctx->uHashSize = 256;
|
||||
ctx->uBlockLength = 192;
|
||||
ctx->uRounds = 8;
|
||||
ctx->hashsize = _mm512_const4_32( 0, 0, 0, 0x100 );
|
||||
ctx->const1536 = _mm512_const4_32( 0, 0, 0, 0x600 );
|
||||
break;
|
||||
|
||||
case 512:
|
||||
ctx->uHashSize = 512;
|
||||
ctx->uBlockLength = 128;
|
||||
ctx->uRounds = 10;
|
||||
ctx->hashsize = _mm512_const4_32( 0, 0, 0, 0x200 );
|
||||
ctx->const1536 = _mm512_const4_32( 0, 0, 0, 0x400);
|
||||
break;
|
||||
|
||||
default:
|
||||
return BAD_HASHBITLEN;
|
||||
}
|
||||
|
||||
|
||||
for( i = 0; i < 4; i++ )
|
||||
for( j = 0; j < nHashSize / 256; j++ )
|
||||
ctx->state[ i ][ j ] = ctx->hashsize;
|
||||
|
||||
for( i = 0; i < 4; i++ )
|
||||
for( j = nHashSize / 256; j < 4; j++ )
|
||||
ctx->state[ i ][ j ] = m512_zero;
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
int echo_4way_update( echo_4way_context *state, const BitSequence *data, DataLength databitlen )
|
||||
{
|
||||
unsigned int uByteLength, uBlockCount, uRemainingBytes;
|
||||
|
||||
uByteLength = (unsigned int)(databitlen / 8);
|
||||
|
||||
if ( ( state->uBufferBytes + uByteLength ) >= state->uBlockLength )
|
||||
{
|
||||
if ( state->uBufferBytes != 0 )
|
||||
{
|
||||
// Fill the buffer
|
||||
memcpy( state->buffer + state->uBufferBytes,
|
||||
(void*)data, state->uBlockLength - state->uBufferBytes );
|
||||
|
||||
// Process buffer
|
||||
echo_4way_compress( state, state->buffer, 1 );
|
||||
state->processed_bits += state->uBlockLength * 8;
|
||||
|
||||
data += state->uBlockLength - state->uBufferBytes;
|
||||
uByteLength -= state->uBlockLength - state->uBufferBytes;
|
||||
}
|
||||
|
||||
// buffer now does not contain any unprocessed bytes
|
||||
|
||||
uBlockCount = uByteLength / state->uBlockLength;
|
||||
uRemainingBytes = uByteLength % state->uBlockLength;
|
||||
|
||||
if ( uBlockCount > 0 )
|
||||
{
|
||||
echo_4way_compress( state, data, uBlockCount );
|
||||
|
||||
state->processed_bits += uBlockCount * state->uBlockLength * 8;
|
||||
data += uBlockCount * state->uBlockLength;
|
||||
}
|
||||
|
||||
if ( uRemainingBytes > 0 )
|
||||
{
|
||||
memcpy( state->buffer, (void*)data, uRemainingBytes );
|
||||
}
|
||||
|
||||
state->uBufferBytes = uRemainingBytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( state->buffer + state->uBufferBytes, (void*)data, uByteLength );
|
||||
state->uBufferBytes += uByteLength;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
echo_4way_close( echo_4way_context *state, BitSequence *hashval )
|
||||
{
|
||||
__m512i remainingbits;
|
||||
|
||||
// Add remaining bytes in the buffer
|
||||
state->processed_bits += state->uBufferBytes * 8;
|
||||
|
||||
remainingbits = _mm512_set4_epi32( 0, 0, 0, state->uBufferBytes * 8 );
|
||||
|
||||
// Pad with 0x80
|
||||
state->buffer[ state->uBufferBytes++ ] = 0x80;
|
||||
|
||||
// Enough buffer space for padding in this block?
|
||||
if ( ( state->uBlockLength - state->uBufferBytes ) >= 18)
|
||||
{
|
||||
// Pad with zeros
|
||||
memset( state->buffer + state->uBufferBytes, 0,
|
||||
state->uBlockLength - ( state->uBufferBytes + 18 ) );
|
||||
|
||||
// Hash size
|
||||
*( (unsigned short*)( state->buffer + state->uBlockLength - 18 ) )
|
||||
= state->uHashSize;
|
||||
|
||||
// Processed bits
|
||||
*( ( DataLength*)( state->buffer + state->uBlockLength - 16 ) )
|
||||
= state->processed_bits;
|
||||
*( ( DataLength*)( state->buffer + state->uBlockLength - 8 ) ) = 0;
|
||||
|
||||
// Last block contains message bits?
|
||||
if ( state->uBufferBytes == 1 )
|
||||
{
|
||||
state->k = _mm512_xor_si512( state->k, state->k );
|
||||
state->k = _mm512_sub_epi64( state->k, state->const1536 );
|
||||
}
|
||||
else
|
||||
{
|
||||
state->k = _mm512_add_epi64( state->k, remainingbits );
|
||||
state->k = _mm512_sub_epi64( state->k, state->const1536 );
|
||||
}
|
||||
|
||||
// Compress
|
||||
echo_4way_compress( state, state->buffer, 1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fill with zero and compress
|
||||
memset( state->buffer + state->uBufferBytes, 0,
|
||||
state->uBlockLength - state->uBufferBytes );
|
||||
state->k = _mm512_add_epi64( state->k, remainingbits );
|
||||
state->k = _mm512_sub_epi64( state->k, state->const1536 );
|
||||
echo_4way_compress( state, state->buffer, 1 );
|
||||
|
||||
// Last block
|
||||
memset( state->buffer, 0, state->uBlockLength - 18 );
|
||||
|
||||
// Hash size
|
||||
*( (unsigned short*)( state->buffer + state->uBlockLength - 18 ) )
|
||||
= state->uHashSize;
|
||||
|
||||
// Processed bits
|
||||
*( (DataLength*)( state->buffer + state->uBlockLength - 16 ) )
|
||||
= state->processed_bits;
|
||||
*( (DataLength*)( state->buffer + state->uBlockLength - 8 ) ) = 0;
|
||||
|
||||
// Compress the last block
|
||||
state->k = _mm512_xor_si512(state->k, state->k);
|
||||
state->k = _mm512_sub_epi64(state->k, state->const1536);
|
||||
echo_4way_compress(state, state->buffer, 1);
|
||||
}
|
||||
|
||||
// Store the hash value
|
||||
_mm512_storeu_si512( (__m512i*)hashval + 0, state->state[ 0][ 0 ]);
|
||||
_mm512_storeu_si512( (__m512i*)hashval + 1, state->state[ 1][ 0 ]);
|
||||
|
||||
if ( state->uHashSize == 512 )
|
||||
{
|
||||
_mm512_storeu_si512((__m512i*)hashval + 2, state->state[ 2 ][ 0 ]);
|
||||
_mm512_storeu_si512((__m512i*)hashval + 3, state->state[ 3 ][ 0 ]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int echo_4way_update_close( echo_4way_context *state, BitSequence *hashval,
|
||||
const BitSequence *data, DataLength databitlen )
|
||||
{
|
||||
unsigned int uByteLength, uBlockCount, uRemainingBytes;
|
||||
|
||||
uByteLength = (unsigned int)(databitlen / 8);
|
||||
|
||||
if ( (state->uBufferBytes + uByteLength) >= state->uBlockLength )
|
||||
{
|
||||
if ( state->uBufferBytes != 0 )
|
||||
{
|
||||
// Fill the buffer
|
||||
memcpy( state->buffer + state->uBufferBytes,
|
||||
(void*)data, state->uBlockLength - state->uBufferBytes );
|
||||
|
||||
// Process buffer
|
||||
echo_4way_compress( state, state->buffer, 1 );
|
||||
state->processed_bits += state->uBlockLength * 8;
|
||||
|
||||
data += state->uBlockLength - state->uBufferBytes;
|
||||
uByteLength -= state->uBlockLength - state->uBufferBytes;
|
||||
}
|
||||
|
||||
// buffer now does not contain any unprocessed bytes
|
||||
|
||||
uBlockCount = uByteLength / state->uBlockLength;
|
||||
uRemainingBytes = uByteLength % state->uBlockLength;
|
||||
|
||||
if ( uBlockCount > 0 )
|
||||
{
|
||||
echo_4way_compress( state, data, uBlockCount );
|
||||
state->processed_bits += uBlockCount * state->uBlockLength * 8;
|
||||
data += uBlockCount * state->uBlockLength;
|
||||
}
|
||||
|
||||
if ( uRemainingBytes > 0 )
|
||||
memcpy(state->buffer, (void*)data, uRemainingBytes);
|
||||
state->uBufferBytes = uRemainingBytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( state->buffer + state->uBufferBytes, (void*)data, uByteLength );
|
||||
state->uBufferBytes += uByteLength;
|
||||
}
|
||||
|
||||
__m512i remainingbits;
|
||||
|
||||
// Add remaining bytes in the buffer
|
||||
state->processed_bits += state->uBufferBytes * 8;
|
||||
|
||||
remainingbits = _mm512_set4_epi32( 0, 0, 0, state->uBufferBytes * 8 );
|
||||
|
||||
// Pad with 0x80
|
||||
state->buffer[ state->uBufferBytes++ ] = 0x80;
|
||||
// Enough buffer space for padding in this block?
|
||||
if ( (state->uBlockLength - state->uBufferBytes) >= 18 )
|
||||
{
|
||||
// Pad with zeros
|
||||
memset( state->buffer + state->uBufferBytes, 0,i
|
||||
state->uBlockLength - (state->uBufferBytes + 18) );
|
||||
|
||||
// Hash size
|
||||
*( (unsigned short*)(state->buffer + state->uBlockLength - 18) )
|
||||
= state->uHashSize;
|
||||
|
||||
// Processed bits
|
||||
*( (DataLength*)(state->buffer + state->uBlockLength - 16) ) =
|
||||
state->processed_bits;
|
||||
*( (DataLength*)(state->buffer + state->uBlockLength - 8) ) = 0;
|
||||
|
||||
// Last block contains message bits?
|
||||
if( state->uBufferBytes == 1 )
|
||||
{
|
||||
state->k = _mm512_xor_si512( state->k, state->k );
|
||||
state->k = _mm512_sub_epi64( state->k, state->const1536 );
|
||||
}
|
||||
else
|
||||
{
|
||||
state->k = _mm_add_epi64( state->k, remainingbits );
|
||||
state->k = _mm_sub_epi64( state->k, state->const1536 );
|
||||
}
|
||||
|
||||
// Compress
|
||||
echo_4way_compress( state, state->buffer, 1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fill with zero and compress
|
||||
memset( state->buffer + state->uBufferBytes, 0,
|
||||
state->uBlockLength - state->uBufferBytes );
|
||||
state->k = _mm512_add_epi64( state->k, remainingbits );
|
||||
state->k = _mm512_sub_epi64( state->k, state->const1536 );
|
||||
echo_4way_compress( state, state->buffer, 1 );
|
||||
|
||||
// Last block
|
||||
memset( state->buffer, 0, state->uBlockLength - 18 );
|
||||
|
||||
// Hash size
|
||||
*( (unsigned short*)(state->buffer + state->uBlockLength - 18) ) =
|
||||
state->uHashSize;
|
||||
|
||||
// Processed bits
|
||||
*( (DataLength*)(state->buffer + state->uBlockLength - 16) ) =
|
||||
state->processed_bits;
|
||||
*( (DataLength*)(state->buffer + state->uBlockLength - 8) ) = 0;
|
||||
// Compress the last block
|
||||
state->k = _mm512_xor_si512( state->k, state->k );
|
||||
state->k = _mm512_sub_epi64( state->k, state->const1536 );
|
||||
echo_4way_compress( state, state->buffer, 1) ;
|
||||
}
|
||||
|
||||
// Store the hash value
|
||||
_mm512_storeu_si512( (__m512i*)hashval + 0, state->state[ 0 ][ 0] );
|
||||
_mm512_storeu_si512( (__m512i*)hashval + 1, state->state[ 1 ][ 0] );
|
||||
|
||||
if ( state->uHashSize == 512 )
|
||||
{
|
||||
_mm512_storeu_si512( (__m512i*)hashval + 2, state->state[ 2 ][ 0 ] );
|
||||
_mm512_storeu_si512( (__m512i*)hashval + 3, state->state[ 3 ][ 0 ] );
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
36
algo/echo/echo-hash-4way.h
Normal file
36
algo/echo/echo-hash-4way.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#if !defined(ECHO_HASH_4WAY_H__)
|
||||
#define ECHO_HASH_4WAY_H__ 1
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
#include "simd-utils.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
__m512i state[4][4];
|
||||
__m512i buffer[ 4 * 192 / 16 ]; // 4x128 interleaved 192 bytes
|
||||
__m512i k;
|
||||
__m512i hashsize;
|
||||
__m512i const1536;
|
||||
|
||||
unsigned int uRounds;
|
||||
unsigned int uHashSize;
|
||||
unsigned int uBlockLength;
|
||||
unsigned int uBufferBytes;
|
||||
unsigned int processed_bits;
|
||||
|
||||
} echo_4way_context __attribute__ ((aligned (64)));
|
||||
|
||||
int echo_4way_init( echo_4way_context *state, int hashbitlen );
|
||||
|
||||
|
||||
int echo_4way_update( echo_4way_context *state, const void *data,
|
||||
unsigned int databitlen);
|
||||
|
||||
int echo_close( echo_4way_context *state, void *hashval );
|
||||
|
||||
int echo_4way_update_close( echo_4way_context *state, void *hashval,
|
||||
const void *data, int databitlen );
|
||||
|
||||
#endif
|
||||
#endif
|
@@ -9,6 +9,7 @@
|
||||
|
||||
//#ifndef NO_AES_NI
|
||||
|
||||
// Not to be confused with AVX512VAES
|
||||
#define VAES
|
||||
// #define VAVX
|
||||
// #define VVPERM
|
||||
|
@@ -45,7 +45,7 @@ void myriad_4way_hash( void *output, const void *input )
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
sha256_4way( &ctx.sha, vhash, 64 );
|
||||
sha256_4way_update( &ctx.sha, vhash, 64 );
|
||||
sha256_4way_close( &ctx.sha, output );
|
||||
}
|
||||
|
||||
|
@@ -1171,7 +1171,8 @@ void hamsi512_4way_init( hamsi_4way_big_context *sc )
|
||||
sc->h[7] = m256_const1_64( 0x6769756d2042656c );
|
||||
}
|
||||
|
||||
void hamsi512_4way( hamsi_4way_big_context *sc, const void *data, size_t len )
|
||||
void hamsi512_4way_update( hamsi_4way_big_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
|
||||
|
@@ -62,7 +62,7 @@ typedef hamsi_4way_big_context hamsi512_4way_context;
|
||||
void hamsi512_4way_init( hamsi512_4way_context *sc );
|
||||
void hamsi512_4way_update( hamsi512_4way_context *sc, const void *data,
|
||||
size_t len );
|
||||
#define hamsi512_4way hamsi512_4way_update
|
||||
//#define hamsi512_4way hamsi512_4way_update
|
||||
void hamsi512_4way_close( hamsi512_4way_context *sc, void *dst );
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
@@ -38,7 +38,7 @@
|
||||
#define SPH_XCAT_(a, b) a ## b
|
||||
|
||||
static void
|
||||
SPH_XCAT(SPH_XCAT(haval, PASSES), _4way)
|
||||
SPH_XCAT(SPH_XCAT(haval, PASSES), _4way_update)
|
||||
( haval_4way_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m128i *vdata = (__m128i*)data;
|
||||
|
@@ -479,9 +479,9 @@ haval ## xxx ## _ ## y ## _4way_init(void *cc) \
|
||||
} \
|
||||
\
|
||||
void \
|
||||
haval ## xxx ## _ ## y ## _4way (void *cc, const void *data, size_t len) \
|
||||
haval ## xxx ## _ ## y ## _4way_update (void *cc, const void *data, size_t len) \
|
||||
{ \
|
||||
haval ## y ## _4way(cc, data, len); \
|
||||
haval ## y ## _4way_update(cc, data, len); \
|
||||
} \
|
||||
\
|
||||
void \
|
||||
|
@@ -85,7 +85,7 @@ typedef haval_4way_context haval256_5_4way_context;
|
||||
void haval256_5_4way_init( void *cc );
|
||||
|
||||
void haval256_5_4way_update( void *cc, const void *data, size_t len );
|
||||
#define haval256_5_4way haval256_5_4way_update
|
||||
//#define haval256_5_4way haval256_5_4way_update
|
||||
|
||||
void haval256_5_4way_close( void *cc, void *dst );
|
||||
|
||||
|
@@ -103,14 +103,12 @@ typedef jh_4way_context jh512_4way_context;
|
||||
void jh256_4way_init( jh_4way_context *sc);
|
||||
|
||||
void jh256_4way_update(void *cc, const void *data, size_t len);
|
||||
#define jh256_4way jh256_4way_update
|
||||
|
||||
void jh256_4way_close(void *cc, void *dst);
|
||||
|
||||
void jh512_4way_init( jh_4way_context *sc );
|
||||
|
||||
void jh512_4way_update(void *cc, const void *data, size_t len);
|
||||
#define jh512_4way jh512_4way_update
|
||||
|
||||
void jh512_4way_close(void *cc, void *dst);
|
||||
|
||||
|
@@ -33,7 +33,7 @@ void jha_hash_4way( void *out, const void *input )
|
||||
keccak512_4way_context ctx_keccak;
|
||||
|
||||
keccak512_4way_init( &ctx_keccak );
|
||||
keccak512_4way( &ctx_keccak, input, 80 );
|
||||
keccak512_4way_update( &ctx_keccak, input, 80 );
|
||||
keccak512_4way_close( &ctx_keccak, vhash );
|
||||
|
||||
// Heavy & Light Pair Loop
|
||||
@@ -58,7 +58,7 @@ void jha_hash_4way( void *out, const void *input )
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
skein512_4way_init( &ctx_skein );
|
||||
skein512_4way( &ctx_skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx_skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx_skein, vhashB );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
@@ -69,7 +69,7 @@ void jha_hash_4way( void *out, const void *input )
|
||||
blake512_4way_close( &ctx_blake, vhashA );
|
||||
|
||||
jh512_4way_init( &ctx_jh );
|
||||
jh512_4way( &ctx_jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx_jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx_jh, vhashB );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
|
@@ -99,14 +99,12 @@ typedef keccak64_ctx_m256i keccak512_4way_context;
|
||||
void keccak256_4way_init(void *cc);
|
||||
void keccak256_4way_update(void *cc, const void *data, size_t len);
|
||||
void keccak256_4way_close(void *cc, void *dst);
|
||||
#define keccak256_4way keccak256_4way_update
|
||||
|
||||
void keccak512_4way_init(void *cc);
|
||||
void keccak512_4way_update(void *cc, const void *data, size_t len);
|
||||
void keccak512_4way_close(void *cc, void *dst);
|
||||
void keccak512_4way_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
#define keccak512_4way keccak512_4way_update
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -1,15 +1,178 @@
|
||||
#include "lyra2-gate.h"
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
|
||||
#if defined (ALLIUM_4WAY)
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
|
||||
#if defined (ALLIUM_8WAY)
|
||||
|
||||
typedef struct {
|
||||
blake256_8way_context blake;
|
||||
keccak256_8way_context keccak;
|
||||
cube_4way_context cube;
|
||||
skein256_8way_context skein;
|
||||
hashState_groestl256 groestl;
|
||||
} allium_8way_ctx_holder;
|
||||
|
||||
static __thread allium_8way_ctx_holder allium_8way_ctx;
|
||||
|
||||
bool init_allium_8way_ctx()
|
||||
{
|
||||
keccak256_8way_init( &allium_8way_ctx.keccak );
|
||||
cube_4way_init( &allium_8way_ctx.cube, 256, 16, 32 );
|
||||
skein256_8way_init( &allium_8way_ctx.skein );
|
||||
init_groestl256( &allium_8way_ctx.groestl, 32 );
|
||||
return true;
|
||||
}
|
||||
|
||||
void allium_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhashB[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
allium_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx, &allium_8way_ctx, sizeof(allium_8way_ctx) );
|
||||
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
|
||||
blake256_8way_close( &ctx.blake, vhash );
|
||||
|
||||
rintrlv_8x32_8x64( vhashA, vhash, 256 );
|
||||
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
|
||||
keccak256_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, 256 );
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
|
||||
intrlv_4x128( vhashA, hash0, hash1, hash2, hash3, 256 );
|
||||
intrlv_4x128( vhashB, hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
|
||||
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, 256 );
|
||||
|
||||
skein256_8way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, 256 );
|
||||
|
||||
update_and_final_groestl256( &ctx.groestl, state, hash0, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+32, hash1, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+64, hash2, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+96, hash3, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+128, hash4, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+160, hash5, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+192, hash6, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+224, hash7, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
}
|
||||
|
||||
int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
blake256_8way_init( &allium_8way_ctx.blake );
|
||||
blake256_8way_update( &allium_8way_ctx.blake, vdata, 64 );
|
||||
|
||||
do {
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
n+3, n+2, n+1, n ) );
|
||||
|
||||
allium_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ ) if ( (hash+(lane<<3))[7] <= Htarg )
|
||||
{
|
||||
if ( fulltest( hash+(lane<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#elif defined (ALLIUM_4WAY)
|
||||
|
||||
|
||||
typedef struct {
|
||||
blake256_4way_context blake;
|
||||
keccak256_4way_context keccak;
|
||||
@@ -41,11 +204,11 @@ void allium_4way_hash( void *state, const void *input )
|
||||
allium_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx, &allium_4way_ctx, sizeof(allium_4way_ctx) );
|
||||
blake256_4way( &ctx.blake, input + (64<<2), 16 );
|
||||
blake256_4way_update( &ctx.blake, input + (64<<2), 16 );
|
||||
blake256_4way_close( &ctx.blake, vhash32 );
|
||||
|
||||
rintrlv_4x32_4x64( vhash64, vhash32, 256 );
|
||||
keccak256_4way( &ctx.keccak, vhash64, 32 );
|
||||
keccak256_4way_update( &ctx.keccak, vhash64, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhash64 );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
@@ -70,7 +233,7 @@ void allium_4way_hash( void *state, const void *input )
|
||||
|
||||
intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
||||
|
||||
skein256_4way( &ctx.skein, vhash64, 32 );
|
||||
skein256_4way_update( &ctx.skein, vhash64, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhash64 );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
@@ -129,7 +129,11 @@ bool register_lyra2rev2_algo( algo_gate_t* gate )
|
||||
|
||||
bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(LYRA2Z_8WAY)
|
||||
#if defined(LYRA2Z_16WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_16way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_16way;
|
||||
gate->hash = (void*)&lyra2z_16way_hash;
|
||||
#elif defined(LYRA2Z_8WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_8way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_8way;
|
||||
gate->hash = (void*)&lyra2z_8way_hash;
|
||||
@@ -142,7 +146,7 @@ bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_lyra2z;
|
||||
gate->hash = (void*)&lyra2z_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT | AVX512_OPT;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
@@ -170,7 +174,11 @@ bool register_lyra2h_algo( algo_gate_t* gate )
|
||||
|
||||
bool register_allium_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (ALLIUM_4WAY)
|
||||
#if defined (ALLIUM_8WAY)
|
||||
gate->miner_thread_init = (void*)&init_allium_8way_ctx;
|
||||
gate->scanhash = (void*)&scanhash_allium_8way;
|
||||
gate->hash = (void*)&allium_8way_hash;
|
||||
#elif defined (ALLIUM_4WAY)
|
||||
gate->miner_thread_init = (void*)&init_allium_4way_ctx;
|
||||
gate->scanhash = (void*)&scanhash_allium_4way;
|
||||
gate->hash = (void*)&allium_4way_hash;
|
||||
@@ -179,7 +187,7 @@ bool register_allium_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_allium;
|
||||
gate->hash = (void*)&allium_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT | AVX512_OPT;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
@@ -85,17 +85,25 @@ bool init_lyra2rev2_ctx();
|
||||
|
||||
/////////////////////////
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#define LYRA2Z_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
#define LYRA2Z_8WAY
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define LYRA2Z_16WAY 1
|
||||
#elif defined(__AVX2__)
|
||||
#define LYRA2Z_8WAY 1
|
||||
#elif defined(__SSE2__)
|
||||
#define LYRA2Z_4WAY 1
|
||||
#endif
|
||||
|
||||
|
||||
#define LYRA2Z_MATRIX_SIZE BLOCK_LEN_INT64 * 8 * 8 * 8
|
||||
|
||||
#if defined(LYRA2Z_8WAY)
|
||||
#if defined(LYRA2Z_16WAY)
|
||||
|
||||
void lyra2z_16way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool lyra2z_16way_thread_init();
|
||||
|
||||
#elif defined(LYRA2Z_8WAY)
|
||||
|
||||
void lyra2z_8way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
@@ -144,13 +152,22 @@ bool lyra2h_thread_init();
|
||||
|
||||
//////////////////////////////////
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define ALLIUM_4WAY
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define ALLIUM_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define ALLIUM_4WAY 1
|
||||
#endif
|
||||
|
||||
bool register_allium_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(ALLIUM_4WAY)
|
||||
#if defined(ALLIUM_8WAY)
|
||||
|
||||
void allium_8way_hash( void *state, const void *input );
|
||||
int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_allium_8way_ctx();
|
||||
|
||||
#elif defined(ALLIUM_4WAY)
|
||||
|
||||
void allium_4way_hash( void *state, const void *input );
|
||||
int scanhash_allium_4way( struct work *work, uint32_t max_nonce,
|
||||
|
@@ -26,14 +26,17 @@
|
||||
#include "lyra2.h"
|
||||
#include "sponge.h"
|
||||
|
||||
// LYRA2RE 8 cols 8 rows used by lyea2re, allium, phi2, x22i, x25x.
|
||||
// LYRA2RE 8 cols 8 rows used by lyra2re, allium, phi2, x22i, x25x,
|
||||
// dynamic matrix allocation.
|
||||
//
|
||||
// LYRA2REV2 4 cols 4 rows used by lyra2rev2.
|
||||
// LYRA2REV2 4 cols 4 rows used by lyra2rev2 and x21s, static matrix
|
||||
// allocation.
|
||||
//
|
||||
// LYRA2REV3 4 cols 4 rows with an extra twist in calculating
|
||||
// rowa in the wandering phase. Used by lyra2rev3.
|
||||
// rowa in the wandering phase. Used by lyra2rev3. Static matrix
|
||||
// allocation.
|
||||
//
|
||||
// LYRA2Z various cols & rows and supports 80 input. Used by lyra2z,
|
||||
// LYRA2Z various cols & rows and supports 80 byte input. Used by lyra2z,
|
||||
// lyra2z330, lyra2h,
|
||||
|
||||
|
||||
@@ -60,7 +63,7 @@
|
||||
*/
|
||||
|
||||
// For lyra2rev3.
|
||||
// convert a simple offset to an index into interleaved data.
|
||||
// convert a simple offset to an index into 2x4 u64 interleaved data.
|
||||
// good for state and 4 row matrix.
|
||||
// index = ( int( off / 4 ) * 2 ) + ( off mod 4 )
|
||||
|
||||
@@ -202,12 +205,8 @@ int LYRA2REV3_2WAY( uint64_t* wholeMatrix, void *K, uint64_t kLen,
|
||||
|
||||
// hard coded for 32 byte input as well as matrix size.
|
||||
// Other required versions include 80 byte input and different block
|
||||
// sizez
|
||||
// sizes.
|
||||
|
||||
//int LYRA2REV3_2WAY( uint64_t* wholeMatrix, void *K, uint64_t kLen,
|
||||
// const void *pwd, const uint64_t pwdlen, const void *salt,
|
||||
// const uint64_t saltlen, const uint64_t timeCost, const uint64_t nRows,
|
||||
// const uint64_t nCols )
|
||||
{
|
||||
//====================== Basic variables ============================//
|
||||
uint64_t _ALIGN(256) state[32];
|
||||
@@ -335,159 +334,111 @@ int LYRA2REV3_2WAY( uint64_t* wholeMatrix, void *K, uint64_t kLen,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
#if 0
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
int LYRA2Z( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
const uint64_t pwdlen, const void *salt, const uint64_t saltlen,
|
||||
const uint64_t timeCost, const uint64_t nRows,
|
||||
const uint64_t nCols )
|
||||
|
||||
int LYRA2Z_2WAY( uint64_t* wholeMatrix, void *K, uint64_t kLen,
|
||||
const void *pwd, const uint64_t pwdlen, const uint64_t timeCost,
|
||||
const uint64_t nRows, const uint64_t nCols )
|
||||
{
|
||||
//========================== Basic variables ============================//
|
||||
uint64_t _ALIGN(256) state[16];
|
||||
int64_t row = 2; //index of row to be processed
|
||||
int64_t prev = 1; //index of prev (last row ever computed/modified)
|
||||
int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering)
|
||||
int64_t tau; //Time Loop iterator
|
||||
int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
|
||||
int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
|
||||
int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
|
||||
// int64_t i; //auxiliary iteration counter
|
||||
uint64_t _ALIGN(256) state[32];
|
||||
int64_t row = 2;
|
||||
int64_t prev = 1;
|
||||
int64_t rowa0 = 0;
|
||||
int64_t rowa1 = 0;
|
||||
int64_t tau;
|
||||
int64_t step = 1;
|
||||
int64_t window = 2;
|
||||
int64_t gap = 1;
|
||||
//=======================================================================/
|
||||
|
||||
//======= Initializing the Memory Matrix and pointers to it =============//
|
||||
//Tries to allocate enough space for the whole memory matrix
|
||||
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
|
||||
// const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
// memset( wholeMatrix, 0, ROW_LEN_BYTES * nRows );
|
||||
|
||||
//==== Getting the password + salt + basil padded with 10*1 ============//
|
||||
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
|
||||
//but this ensures that the password copied locally will be overwritten as soon as possible
|
||||
|
||||
//First, we clean enough blocks for the password, salt, basil and padding
|
||||
uint64_t nBlocksInput = ( ( saltlen + pwdlen + 6 *
|
||||
uint64_t nBlocksInput = ( ( pwdlen + pwdlen + 6 *
|
||||
sizeof (uint64_t) ) / BLOCK_LEN_BLAKE2_SAFE_BYTES ) + 1;
|
||||
byte *ptrByte = (byte*) wholeMatrix;
|
||||
memset( ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES );
|
||||
|
||||
//Prepends the password
|
||||
memcpy(ptrByte, pwd, pwdlen);
|
||||
ptrByte += pwdlen;
|
||||
uint64_t *ptr = wholeMatrix;
|
||||
uint64_t *pw = (uint64_t*)pwd;
|
||||
|
||||
//Concatenates the salt
|
||||
memcpy(ptrByte, salt, saltlen);
|
||||
ptrByte += saltlen;
|
||||
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
|
||||
memcpy(ptrByte, &kLen, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy(ptrByte, &pwdlen, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy(ptrByte, &saltlen, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy(ptrByte, &timeCost, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy(ptrByte, &nRows, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy(ptrByte, &nCols, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy( ptr, pw, 2*pwdlen ); // password
|
||||
ptr += pwdlen>>2;
|
||||
memcpy( ptr, pw, 2*pwdlen ); // password lane 1
|
||||
ptr += pwdlen>>2;
|
||||
|
||||
//Now comes the padding
|
||||
*ptrByte = 0x80; //first byte of padding: right after the password
|
||||
ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
|
||||
ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
|
||||
*ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
|
||||
// now build the rest interleaving on the fly.
|
||||
ptr[0] = ptr[ 4] = kLen;
|
||||
ptr[1] = ptr[ 5] = pwdlen;
|
||||
ptr[2] = ptr[ 6] = pwdlen; // saltlen
|
||||
ptr[3] = ptr[ 7] = timeCost;
|
||||
ptr[8] = ptr[12] = nRows;
|
||||
ptr[9] = ptr[13] = nCols;
|
||||
ptr[10] = ptr[14] = 0x80;
|
||||
ptr[11] = ptr[15] = 0x0100000000000000;
|
||||
|
||||
//=================== Initializing the Sponge State ====================//
|
||||
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
|
||||
// uint64_t *state = _mm_malloc(16 * sizeof(uint64_t), 32);
|
||||
// if (state == NULL) {
|
||||
// return -1;
|
||||
// }
|
||||
// initState( state );
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
|
||||
//============================== Setup Phase =============================//
|
||||
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
absorbBlockBlake2Safe_2way( state, ptrWord, nBlocksInput,
|
||||
BLOCK_LEN_BLAKE2_SAFE_INT64 );
|
||||
|
||||
absorbBlockBlake2Safe( state, ptrWord, nBlocksInput,
|
||||
BLOCK_LEN_BLAKE2_SAFE_INT64 );
|
||||
/*
|
||||
for ( i = 0; i < nBlocksInput; i++ )
|
||||
{
|
||||
absorbBlockBlake2Safe( state, ptrWord ); //absorbs each block of pad(pwd || salt || basil)
|
||||
ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT64; //goes to next block of pad(pwd || salt || basil)
|
||||
}
|
||||
*/
|
||||
//Initializes M[0] and M[1]
|
||||
reducedSqueezeRow0(state, &wholeMatrix[0], nCols); //The locally copied password is most likely overwritten here
|
||||
reducedDuplexRow1(state, &wholeMatrix[0], &wholeMatrix[ROW_LEN_INT64], nCols);
|
||||
//Initializes M[0] and M[1]
|
||||
reducedSqueezeRow0_2way( state, &wholeMatrix[0], nCols );
|
||||
|
||||
do {
|
||||
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
|
||||
reducedDuplexRowSetup(state, &wholeMatrix[prev*ROW_LEN_INT64], &wholeMatrix[rowa*ROW_LEN_INT64], &wholeMatrix[row*ROW_LEN_INT64], nCols);
|
||||
reducedDuplexRow1_2way( state, &wholeMatrix[0],
|
||||
&wholeMatrix[ 2 * ROW_LEN_INT64 ], nCols );
|
||||
|
||||
//updates the value of row* (deterministically picked during Setup))
|
||||
rowa = (rowa + step) & (window - 1);
|
||||
//update prev: it now points to the last row ever computed
|
||||
prev = row;
|
||||
//updates row: goes to the next row to be computed
|
||||
row++;
|
||||
do
|
||||
{
|
||||
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
|
||||
|
||||
//Checks if all rows in the window where visited.
|
||||
if (rowa == 0) {
|
||||
step = window + gap; //changes the step: approximately doubles its value
|
||||
window *= 2; //doubles the size of the re-visitation window
|
||||
gap = -gap; //inverts the modifier to the step
|
||||
}
|
||||
reducedDuplexRowSetup_2way( state, &wholeMatrix[ 2* prev * ROW_LEN_INT64],
|
||||
&wholeMatrix[ 2* rowa0 * ROW_LEN_INT64],
|
||||
&wholeMatrix[ 2* row*ROW_LEN_INT64],
|
||||
nCols );
|
||||
|
||||
} while (row < nRows);
|
||||
rowa0 = (rowa0 + step) & (window - 1);
|
||||
prev = row;
|
||||
row++;
|
||||
|
||||
//======================== Wandering Phase =============================//
|
||||
row = 0; //Resets the visitation to the first row of the memory matrix
|
||||
for ( tau = 1; tau <= timeCost; tau++ )
|
||||
{
|
||||
//Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1
|
||||
if ( rowa0 == 0 )
|
||||
{
|
||||
step = window + gap;
|
||||
window *= 2;
|
||||
gap = -gap;
|
||||
}
|
||||
} while ( row < nRows );
|
||||
|
||||
row = 0;
|
||||
for ( tau = 1; tau <= timeCost; tau++ )
|
||||
{
|
||||
step = (tau % 2 == 0) ? -1 : nRows / 2 - 1;
|
||||
do {
|
||||
//Selects a pseudorandom index row*
|
||||
//----------------------------------------------------------------------
|
||||
//rowa = ((unsigned int)state[0]) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
|
||||
rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
|
||||
//-----------------------------------------------------------------
|
||||
do
|
||||
{
|
||||
rowa0 = state[ 0 ] % nRows;
|
||||
rowa1 = state[ 4 ] % nRows;
|
||||
|
||||
//Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
|
||||
reducedDuplexRow(state, &wholeMatrix[prev*ROW_LEN_INT64], &wholeMatrix[rowa*ROW_LEN_INT64], &wholeMatrix[row*ROW_LEN_INT64], nCols);
|
||||
reducedDuplexRow_2way( state, &wholeMatrix[ 2* prev * ROW_LEN_INT64 ],
|
||||
&wholeMatrix[ 2* rowa0 * ROW_LEN_INT64 ],
|
||||
&wholeMatrix[ 2* rowa1 * ROW_LEN_INT64 ],
|
||||
&wholeMatrix[ 2* row *ROW_LEN_INT64 ],
|
||||
nCols );
|
||||
|
||||
//update prev: it now points to the last row ever computed
|
||||
prev = row;
|
||||
|
||||
//updates row: goes to the next row to be computed
|
||||
//---------------------------------------------------------------
|
||||
//row = (row + step) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
|
||||
row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
|
||||
//--------------------------------------------------------------------
|
||||
row = (row + step) % nRows;
|
||||
|
||||
} while (row != 0);
|
||||
}
|
||||
}
|
||||
|
||||
//========================= Wrap-up Phase ===============================//
|
||||
//Absorbs the last block of the memory matrix
|
||||
absorbBlock(state, &wholeMatrix[rowa*ROW_LEN_INT64]);
|
||||
absorbBlock_2way( state, &wholeMatrix[ 2 * rowa0 *ROW_LEN_INT64 ],
|
||||
&wholeMatrix[ 2 * rowa1 *ROW_LEN_INT64 ] );
|
||||
|
||||
//Squeezes the key
|
||||
squeeze( state, K, kLen );
|
||||
//Squeezes the key
|
||||
squeeze_2way( state, K, (unsigned int) kLen );
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
// Lyra2RE doesn't like the new wholeMatrix implementation
|
||||
int LYRA2RE_2WAY( void *K, uint64_t kLen, const void *pwd,
|
||||
@@ -495,7 +446,7 @@ int LYRA2RE_2WAY( void *K, uint64_t kLen, const void *pwd,
|
||||
const uint64_t nRows, const uint64_t nCols )
|
||||
{
|
||||
//====================== Basic variables ============================//
|
||||
uint64_t _ALIGN(256) state[16];
|
||||
uint64_t _ALIGN(256) state[32];
|
||||
int64_t row = 2; //index of row to be processed
|
||||
int64_t prev = 1; //index of prev (last row ever computed/modified)
|
||||
int64_t rowa0 = 0;
|
||||
@@ -517,25 +468,15 @@ int LYRA2RE_2WAY( void *K, uint64_t kLen, const void *pwd,
|
||||
: BLOCK_LEN_BLAKE2_SAFE_BYTES;
|
||||
|
||||
i = (int64_t)ROW_LEN_BYTES * nRows;
|
||||
uint64_t *wholeMatrix = _mm_malloc( i, 64 );
|
||||
uint64_t *wholeMatrix = _mm_malloc( 2*i, 64 );
|
||||
if (wholeMatrix == NULL)
|
||||
return -1;
|
||||
|
||||
#if defined(__AVX2__)
|
||||
memset_zero_256( (__m256i*)wholeMatrix, i>>5 );
|
||||
#elif defined(__SSE2__)
|
||||
memset_zero_128( (__m128i*)wholeMatrix, i>>4 );
|
||||
#else
|
||||
memset( wholeMatrix, 0, i );
|
||||
#endif
|
||||
memset_zero_512( (__m512i*)wholeMatrix, i>>5 );
|
||||
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
uint64_t *pw = (uint64_t*)pwd;
|
||||
|
||||
//=== Getting the password + salt + basil padded with 10*1 ==========//
|
||||
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
|
||||
//but this ensures that the password copied locally will be overwritten as soon as possible
|
||||
|
||||
//First, we clean enough blocks for the password, salt, basil and padding
|
||||
int64_t nBlocksInput = ( ( pwdlen + pwdlen + 6 * sizeof(uint64_t) )
|
||||
/ BLOCK_LEN_BLAKE2_SAFE_BYTES ) + 1;
|
||||
@@ -558,66 +499,8 @@ int LYRA2RE_2WAY( void *K, uint64_t kLen, const void *pwd,
|
||||
ptr[10] = ptr[14] = 0x80;
|
||||
ptr[11] = ptr[15] = 0x0100000000000000;
|
||||
|
||||
|
||||
/*
|
||||
byte *ptrByte = (byte*) wholeMatrix;
|
||||
|
||||
//Prepends the password
|
||||
memcpy(ptrByte, pwd, pwdlen);
|
||||
ptrByte += pwdlen;
|
||||
|
||||
//Concatenates the salt
|
||||
memcpy(ptrByte, salt, saltlen);
|
||||
ptrByte += saltlen;
|
||||
|
||||
// memset( ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES
|
||||
// - (saltlen + pwdlen) );
|
||||
|
||||
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
|
||||
memcpy(ptrByte, &kLen, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
v64 = pwdlen;
|
||||
memcpy(ptrByte, &v64, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
v64 = saltlen;
|
||||
memcpy(ptrByte, &v64, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
v64 = timeCost;
|
||||
memcpy(ptrByte, &v64, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
v64 = nRows;
|
||||
memcpy(ptrByte, &v64, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
v64 = nCols;
|
||||
memcpy(ptrByte, &v64, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
|
||||
//Now comes the padding
|
||||
*ptrByte = 0x80; //first byte of padding: right after the password
|
||||
ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
|
||||
ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
|
||||
*ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
|
||||
|
||||
//================= Initializing the Sponge State ====================//
|
||||
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
|
||||
|
||||
// initState( state );
|
||||
|
||||
//========================= Setup Phase =============================//
|
||||
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
|
||||
|
||||
ptrWord = wholeMatrix;
|
||||
|
||||
*/
|
||||
|
||||
absorbBlockBlake2Safe_2way( state, ptrWord, nBlocksInput, BLOCK_LEN );
|
||||
/*
|
||||
for (i = 0; i < nBlocksInput; i++)
|
||||
{
|
||||
absorbBlockBlake2Safe( state, ptrWord ); //absorbs each block of pad(pwd || salt || basil)
|
||||
ptrWord += BLOCK_LEN; //goes to next block of pad(pwd || salt || basil)
|
||||
}
|
||||
*/
|
||||
|
||||
//Initializes M[0] and M[1]
|
||||
reducedSqueezeRow0_2way( state, &wholeMatrix[0], nCols ); //The locally copied password is most likely overwritten here
|
||||
|
||||
|
@@ -62,6 +62,8 @@ int LYRA2(void *K, int64_t kLen, const void *pwd, int32_t pwdlen, const void *sa
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
int LYRA2RE_2WAY( void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen,
|
||||
uint64_t timeCost, uint64_t nRows, uint64_t nCols );
|
||||
|
||||
int LYRA2REV2_2WAY( uint64_t*, void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols );
|
||||
@@ -69,6 +71,9 @@ int LYRA2REV2_2WAY( uint64_t*, void *K, uint64_t kLen, const void *pwd,
|
||||
int LYRA2REV3_2WAY( uint64_t*, void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols );
|
||||
|
||||
int LYRA2Z_2WAY( uint64_t*, void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols );
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* LYRA2_H_ */
|
||||
|
@@ -20,7 +20,7 @@ static __thread blake256_4way_context l2h_4way_blake_mid;
|
||||
void lyra2h_4way_midstate( const void* input )
|
||||
{
|
||||
blake256_4way_init( &l2h_4way_blake_mid );
|
||||
blake256_4way( &l2h_4way_blake_mid, input, 64 );
|
||||
blake256_4way_update( &l2h_4way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2h_4way_hash( void *state, const void *input )
|
||||
|
@@ -44,7 +44,7 @@ void lyra2rev2_8way_hash( void *state, const void *input )
|
||||
lyra2v2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &l2v2_8way_ctx, sizeof(l2v2_8way_ctx) );
|
||||
|
||||
blake256_8way( &ctx.blake, input + (64<<3), 16 );
|
||||
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
|
||||
blake256_8way_close( &ctx.blake, vhash );
|
||||
|
||||
rintrlv_8x32_8x64( vhashA, vhash, 256 );
|
||||
@@ -176,12 +176,12 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
lyra2v2_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &l2v2_4way_ctx, sizeof(l2v2_4way_ctx) );
|
||||
|
||||
blake256_4way( &ctx.blake, input + (64<<2), 16 );
|
||||
blake256_4way_update( &ctx.blake, input + (64<<2), 16 );
|
||||
blake256_4way_close( &ctx.blake, vhash );
|
||||
|
||||
rintrlv_4x32_4x64( vhash64, vhash, 256 );
|
||||
|
||||
keccak256_4way( &ctx.keccak, vhash64, 32 );
|
||||
keccak256_4way_update( &ctx.keccak, vhash64, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhash64 );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
@@ -201,7 +201,7 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
|
||||
intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
||||
|
||||
skein256_4way( &ctx.skein, vhash64, 32 );
|
||||
skein256_4way_update( &ctx.skein, vhash64, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhash64 );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
@@ -217,7 +217,7 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
|
||||
bmw256_4way( &ctx.bmw, vhash, 32 );
|
||||
bmw256_4way_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_4way_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
@@ -242,7 +242,7 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
|
||||
blake256_4way_init( &l2v2_4way_ctx.blake );
|
||||
blake256_4way( &l2v2_4way_ctx.blake, vdata, 64 );
|
||||
blake256_4way_update( &l2v2_4way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
|
@@ -209,7 +209,7 @@ void lyra2rev3_8way_hash( void *state, const void *input )
|
||||
lyra2v3_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &l2v3_8way_ctx, sizeof(l2v3_8way_ctx) );
|
||||
|
||||
blake256_8way( &ctx.blake, input + (64*8), 16 );
|
||||
blake256_8way_update( &ctx.blake, input + (64*8), 16 );
|
||||
blake256_8way_close( &ctx.blake, vhash );
|
||||
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
@@ -252,7 +252,7 @@ void lyra2rev3_8way_hash( void *state, const void *input )
|
||||
intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
bmw256_8way( &ctx.bmw, vhash, 32 );
|
||||
bmw256_8way_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_8way_close( &ctx.bmw, state );
|
||||
|
||||
}
|
||||
@@ -277,7 +277,7 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
|
||||
blake256_8way_init( &l2v3_8way_ctx.blake );
|
||||
blake256_8way( &l2v3_8way_ctx.blake, vdata, 64 );
|
||||
blake256_8way_update( &l2v3_8way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
@@ -334,8 +334,7 @@ void lyra2rev3_4way_hash( void *state, const void *input )
|
||||
lyra2v3_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &l2v3_4way_ctx, sizeof(l2v3_4way_ctx) );
|
||||
|
||||
// blake256_4way( &ctx.blake, input, 80 );
|
||||
blake256_4way( &ctx.blake, input + (64*4), 16 );
|
||||
blake256_4way_update( &ctx.blake, input + (64*4), 16 );
|
||||
blake256_4way_close( &ctx.blake, vhash );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
|
||||
@@ -358,7 +357,7 @@ void lyra2rev3_4way_hash( void *state, const void *input )
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
bmw256_4way( &ctx.bmw, vhash, 32 );
|
||||
bmw256_4way_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_4way_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
@@ -383,7 +382,7 @@ int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce,
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
|
||||
blake256_4way_init( &l2v3_4way_ctx.blake );
|
||||
blake256_4way( &l2v3_4way_ctx.blake, vdata, 64 );
|
||||
blake256_4way_update( &l2v3_4way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
|
@@ -1,13 +1,240 @@
|
||||
#include "lyra2-gate.h"
|
||||
|
||||
#ifdef LYRA2Z_4WAY
|
||||
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
#include "lyra2.h"
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
|
||||
#if defined(LYRA2Z_16WAY)
|
||||
|
||||
__thread uint64_t* lyra2z_16way_matrix;
|
||||
|
||||
bool lyra2z_16way_thread_init()
|
||||
{
|
||||
return ( lyra2z_16way_matrix = _mm_malloc( 2*LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_16way_context l2z_16way_blake_mid;
|
||||
|
||||
void lyra2z_16way_midstate( const void* input )
|
||||
{
|
||||
blake256_16way_init( &l2z_16way_blake_mid );
|
||||
blake256_16way_update( &l2z_16way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2z_16way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash8[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash9[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash10[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash11[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash12[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash13[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash14[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash15[8] __attribute__ ((aligned (64)));
|
||||
blake256_16way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &l2z_16way_blake_mid, sizeof l2z_16way_blake_mid );
|
||||
blake256_16way_update( &ctx_blake, input + (64*16), 16 );
|
||||
blake256_16way_close( &ctx_blake, vhash );
|
||||
|
||||
dintrlv_16x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
hash8, hash9, hash10, hash11 ,hash12, hash13, hash14, hash15,
|
||||
vhash, 256 );
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash8, hash9, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash8, hash9, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash10, hash11, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash10, hash11, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash12, hash13, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash12, hash13, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash14, hash15, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash14, hash15, vhash, 256 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+ 32, hash1, 32 );
|
||||
memcpy( state+ 64, hash2, 32 );
|
||||
memcpy( state+ 96, hash3, 32 );
|
||||
memcpy( state+128, hash4, 32 );
|
||||
memcpy( state+160, hash5, 32 );
|
||||
memcpy( state+192, hash6, 32 );
|
||||
memcpy( state+224, hash7, 32 );
|
||||
memcpy( state+256, hash8, 32 );
|
||||
memcpy( state+288, hash9, 32 );
|
||||
memcpy( state+320, hash10, 32 );
|
||||
memcpy( state+352, hash11, 32 );
|
||||
memcpy( state+384, hash12, 32 );
|
||||
memcpy( state+416, hash13, 32 );
|
||||
memcpy( state+448, hash14, 32 );
|
||||
memcpy( state+480, hash15, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||
lyra2z_16way_midstate( vdata );
|
||||
|
||||
do {
|
||||
*noncev = mm512_bswap_32( _mm512_set_epi32( n+15, n+14, n+13, n+12,
|
||||
n+11, n+10, n+ 9, n+ 8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4,
|
||||
n+ 3, n+ 2, n+ 1, n ) );
|
||||
lyra2z_16way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 16;
|
||||
} while ( (n < max_nonce-16) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined(LYRA2Z_8WAY)
|
||||
|
||||
__thread uint64_t* lyra2z_8way_matrix;
|
||||
|
||||
bool lyra2z_8way_thread_init()
|
||||
{
|
||||
return ( lyra2z_8way_matrix = _mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_8way_context l2z_8way_blake_mid;
|
||||
|
||||
void lyra2z_8way_midstate( const void* input )
|
||||
{
|
||||
blake256_8way_init( &l2z_8way_blake_mid );
|
||||
blake256_8way_update( &l2z_8way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2z_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
blake256_8way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &l2z_8way_blake_mid, sizeof l2z_8way_blake_mid );
|
||||
blake256_8way_update( &ctx_blake, input + (64*8), 16 );
|
||||
blake256_8way_close( &ctx_blake, vhash );
|
||||
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
LYRA2Z( lyra2z_8way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash2, 32, hash2, 32, hash2, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash3, 32, hash3, 32, hash3, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash4, 32, hash4, 32, hash4, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash5, 32, hash5, 32, hash5, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash6, 32, hash6, 32, hash6, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash7, 32, hash7, 32, hash7, 32, 8, 8, 8 );
|
||||
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+ 32, hash1, 32 );
|
||||
memcpy( state+ 64, hash2, 32 );
|
||||
memcpy( state+ 96, hash3, 32 );
|
||||
memcpy( state+128, hash4, 32 );
|
||||
memcpy( state+160, hash5, 32 );
|
||||
memcpy( state+192, hash6, 32 );
|
||||
memcpy( state+224, hash7, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
lyra2z_8way_midstate( vdata );
|
||||
|
||||
do {
|
||||
*noncev = mm256_bswap_32(
|
||||
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||
lyra2z_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#elif defined(LYRA2Z_4WAY)
|
||||
|
||||
|
||||
__thread uint64_t* lyra2z_4way_matrix;
|
||||
|
||||
bool lyra2z_4way_thread_init()
|
||||
@@ -20,7 +247,7 @@ static __thread blake256_4way_context l2z_4way_blake_mid;
|
||||
void lyra2z_4way_midstate( const void* input )
|
||||
{
|
||||
blake256_4way_init( &l2z_4way_blake_mid );
|
||||
blake256_4way( &l2z_4way_blake_mid, input, 64 );
|
||||
blake256_4way_update( &l2z_4way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2z_4way_hash( void *state, const void *input )
|
||||
@@ -33,7 +260,7 @@ void lyra2z_4way_hash( void *state, const void *input )
|
||||
blake256_4way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &l2z_4way_blake_mid, sizeof l2z_4way_blake_mid );
|
||||
blake256_4way( &ctx_blake, input + (64*4), 16 );
|
||||
blake256_4way_update( &ctx_blake, input + (64*4), 16 );
|
||||
blake256_4way_close( &ctx_blake, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
@@ -85,100 +312,3 @@ int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(LYRA2Z_8WAY)
|
||||
|
||||
__thread uint64_t* lyra2z_8way_matrix;
|
||||
|
||||
bool lyra2z_8way_thread_init()
|
||||
{
|
||||
return ( lyra2z_8way_matrix = _mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_8way_context l2z_8way_blake_mid;
|
||||
|
||||
void lyra2z_8way_midstate( const void* input )
|
||||
{
|
||||
blake256_8way_init( &l2z_8way_blake_mid );
|
||||
blake256_8way( &l2z_8way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2z_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
blake256_8way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &l2z_8way_blake_mid, sizeof l2z_8way_blake_mid );
|
||||
blake256_8way( &ctx_blake, input + (64*8), 16 );
|
||||
blake256_8way_close( &ctx_blake, vhash );
|
||||
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
LYRA2Z( lyra2z_8way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash2, 32, hash2, 32, hash2, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash3, 32, hash3, 32, hash3, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash4, 32, hash4, 32, hash4, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash5, 32, hash5, 32, hash5, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash6, 32, hash6, 32, hash6, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash7, 32, hash7, 32, hash7, 32, 8, 8, 8 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+ 32, hash1, 32 );
|
||||
memcpy( state+ 64, hash2, 32 );
|
||||
memcpy( state+ 96, hash3, 32 );
|
||||
memcpy( state+128, hash4, 32 );
|
||||
memcpy( state+160, hash5, 32 );
|
||||
memcpy( state+192, hash6, 32 );
|
||||
memcpy( state+224, hash7, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
lyra2z_8way_midstate( vdata );
|
||||
|
||||
do {
|
||||
*noncev = mm256_bswap_32(
|
||||
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||
lyra2z_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
@@ -133,7 +133,7 @@ void nist5hash_4way( void *out, const void *input )
|
||||
keccak512_4way_context ctx_keccak;
|
||||
|
||||
blake512_4way_init( &ctx_blake );
|
||||
blake512_4way( &ctx_blake, input, 80 );
|
||||
blake512_4way_update( &ctx_blake, input, 80 );
|
||||
blake512_4way_close( &ctx_blake, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -154,15 +154,15 @@ void nist5hash_4way( void *out, const void *input )
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
jh512_4way_init( &ctx_jh );
|
||||
jh512_4way( &ctx_jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx_jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx_jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx_keccak );
|
||||
keccak512_4way( &ctx_keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx_keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx_keccak, vhash );
|
||||
|
||||
skein512_4way_init( &ctx_skein );
|
||||
skein512_4way( &ctx_skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx_skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx_skein, out );
|
||||
}
|
||||
|
||||
|
@@ -54,10 +54,10 @@ void anime_4way_hash( void *state, const void *input )
|
||||
anime_4way_ctx_holder ctx;
|
||||
memcpy( &ctx, &anime_4way_ctx, sizeof(anime_4way_ctx) );
|
||||
|
||||
bmw512_4way( &ctx.bmw, input, 80 );
|
||||
bmw512_4way_update( &ctx.bmw, input, 80 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
blake512_4way( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
@@ -92,7 +92,7 @@ void anime_4way_hash( void *state, const void *input )
|
||||
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
}
|
||||
|
||||
@@ -111,7 +111,7 @@ void anime_4way_hash( void *state, const void *input )
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
@@ -119,23 +119,23 @@ void anime_4way_hash( void *state, const void *input )
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhashA );
|
||||
}
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||
}
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
@@ -143,13 +143,13 @@ void anime_4way_hash( void *state, const void *input )
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||
}
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhashB );
|
||||
}
|
||||
|
||||
|
@@ -830,7 +830,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
__m256i* vhB = (__m256i*)vhashB;
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, input, 80 );
|
||||
bmw512_4way_update( &ctx.bmw, input, 80 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -889,18 +889,18 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
}
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
// second fork, A = blake parallel, B= bmw parallel.
|
||||
@@ -911,14 +911,14 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhashA );
|
||||
}
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||
}
|
||||
|
||||
@@ -962,14 +962,14 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||
}
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhashB );
|
||||
}
|
||||
|
||||
@@ -990,7 +990,6 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
sph_shavite512 ( &ctx.shavite, hash3, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
|
||||
|
||||
intrlv_2x128_512( vhashA, hash0, hash1 );
|
||||
intrlv_2x128_512( vhashB, hash2, hash3 );
|
||||
|
||||
@@ -1042,7 +1041,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhash, 64 );
|
||||
haval256_5_4way_update( &ctx.haval, vhash, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, vhash );
|
||||
memset( &vhash[8<<2], 0, 32<<2 );
|
||||
rintrlv_4x32_4x64( vhashB, vhash, 512 );
|
||||
@@ -1068,7 +1067,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -1130,7 +1129,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -1214,7 +1213,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -1269,7 +1268,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhashB );
|
||||
}
|
||||
|
||||
@@ -1289,7 +1288,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
|
||||
// A = haval parallel, B = Whirlpool serial
|
||||
@@ -1305,7 +1304,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhash, 64 );
|
||||
haval256_5_4way_update( &ctx.haval, vhash, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, vhash );
|
||||
memset( &vhash[8<<2], 0, 32<<2 );
|
||||
rintrlv_4x32_4x64( vhashA, vhash, 512 );
|
||||
@@ -1341,7 +1340,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
memcpy(state, vhash, 32<<2 );
|
||||
|
@@ -289,10 +289,10 @@ void quark_4way_hash( void *state, const void *input )
|
||||
|
||||
memcpy( &ctx, &quark_4way_ctx, sizeof(quark_4way_ctx) );
|
||||
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
@@ -327,7 +327,7 @@ void quark_4way_hash( void *state, const void *input )
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
}
|
||||
|
||||
@@ -346,7 +346,7 @@ void quark_4way_hash( void *state, const void *input )
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
@@ -354,24 +354,24 @@ void quark_4way_hash( void *state, const void *input )
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhashA );
|
||||
}
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||
}
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
@@ -379,14 +379,14 @@ void quark_4way_hash( void *state, const void *input )
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||
}
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhashB );
|
||||
}
|
||||
|
||||
|
@@ -7,16 +7,147 @@
|
||||
#include "ripemd-hash-4way.h"
|
||||
|
||||
#define LBRY_INPUT_SIZE 112
|
||||
#define LBRY_MIDSTATE 64
|
||||
#define LBRY_MIDSTATE 96
|
||||
#define LBRY_TAIL (LBRY_INPUT_SIZE) - (LBRY_MIDSTATE)
|
||||
|
||||
#if defined(LBRY_8WAY)
|
||||
#if defined(LBRY_16WAY)
|
||||
|
||||
static __thread sha256_16way_context sha256_16w_mid;
|
||||
|
||||
void lbry_16way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t _ALIGN(128) vhashA[16<<4];
|
||||
uint32_t _ALIGN(64) vhashB[16<<4];
|
||||
uint32_t _ALIGN(64) vhashC[16<<4];
|
||||
uint32_t _ALIGN(64) h0[32];
|
||||
uint32_t _ALIGN(64) h1[32];
|
||||
uint32_t _ALIGN(64) h2[32];
|
||||
uint32_t _ALIGN(64) h3[32];
|
||||
uint32_t _ALIGN(64) h4[32];
|
||||
uint32_t _ALIGN(64) h5[32];
|
||||
uint32_t _ALIGN(64) h6[32];
|
||||
uint32_t _ALIGN(64) h7[32];
|
||||
uint32_t _ALIGN(64) h8[32];
|
||||
uint32_t _ALIGN(64) h9[32];
|
||||
uint32_t _ALIGN(64) h10[32];
|
||||
uint32_t _ALIGN(64) h11[32];
|
||||
uint32_t _ALIGN(64) h12[32];
|
||||
uint32_t _ALIGN(64) h13[32];
|
||||
uint32_t _ALIGN(64) h14[32];
|
||||
uint32_t _ALIGN(64) h15[32];
|
||||
sha256_16way_context ctx_sha256 __attribute__ ((aligned (64)));
|
||||
sha512_8way_context ctx_sha512;
|
||||
ripemd160_16way_context ctx_ripemd;
|
||||
|
||||
memcpy( &ctx_sha256, &sha256_16w_mid, sizeof(ctx_sha256) );
|
||||
sha256_16way_update( &ctx_sha256, input + (LBRY_MIDSTATE<<4), LBRY_TAIL );
|
||||
sha256_16way_close( &ctx_sha256, vhashA );
|
||||
|
||||
sha256_16way_init( &ctx_sha256 );
|
||||
sha256_16way_update( &ctx_sha256, vhashA, 32 );
|
||||
sha256_16way_close( &ctx_sha256, vhashA );
|
||||
|
||||
// reinterleave to do sha512 4-way 64 bit twice.
|
||||
dintrlv_16x32( h0, h1, h2, h3, h4, h5, h6, h7,
|
||||
h8, h9, h10, h11, h12, h13, h14, h15, vhashA, 256 );
|
||||
intrlv_8x64( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, 256 );
|
||||
intrlv_8x64( vhashB, h8, h9, h10, h11, h12, h13, h14, h15, 256 );
|
||||
|
||||
sha512_8way_init( &ctx_sha512 );
|
||||
sha512_8way_update( &ctx_sha512, vhashA, 32 );
|
||||
sha512_8way_close( &ctx_sha512, vhashA );
|
||||
|
||||
sha512_8way_init( &ctx_sha512 );
|
||||
sha512_8way_update( &ctx_sha512, vhashB, 32 );
|
||||
sha512_8way_close( &ctx_sha512, vhashB );
|
||||
|
||||
// back to 8-way 32 bit
|
||||
dintrlv_8x64( h0, h1, h2, h3, h4, h5, h6, h7, vhashA, 512 );
|
||||
dintrlv_8x64( h8, h9, h10, h11, h12, h13, h14, h15, vhashB, 512 );
|
||||
intrlv_16x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7,
|
||||
h8, h9, h10, h11, h12, h13, h14, h15, 512 );
|
||||
|
||||
ripemd160_16way_init( &ctx_ripemd );
|
||||
ripemd160_16way_update( &ctx_ripemd, vhashA, 32 );
|
||||
ripemd160_16way_close( &ctx_ripemd, vhashB );
|
||||
|
||||
ripemd160_16way_init( &ctx_ripemd );
|
||||
ripemd160_16way_update( &ctx_ripemd, vhashA+(8<<4), 32 );
|
||||
ripemd160_16way_close( &ctx_ripemd, vhashC );
|
||||
|
||||
sha256_16way_init( &ctx_sha256 );
|
||||
sha256_16way_update( &ctx_sha256, vhashB, 20 );
|
||||
sha256_16way_update( &ctx_sha256, vhashC, 20 );
|
||||
sha256_16way_close( &ctx_sha256, vhashA );
|
||||
|
||||
sha256_16way_init( &ctx_sha256 );
|
||||
sha256_16way_update( &ctx_sha256, vhashA, 32 );
|
||||
sha256_16way_close( &ctx_sha256, output );
|
||||
}
|
||||
|
||||
int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[32*16] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[32] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<4]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[27];
|
||||
const uint32_t first_nonce = pdata[27];
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m512i *noncev = (__m512i*)vdata + 27; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
// we need bigendian data...
|
||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
casti_m128i( edata, 5 ) = mm128_bswap_32( casti_m128i( pdata, 5 ) );
|
||||
casti_m128i( edata, 6 ) = mm128_bswap_32( casti_m128i( pdata, 6 ) );
|
||||
casti_m128i( edata, 7 ) = mm128_bswap_32( casti_m128i( pdata, 7 ) );
|
||||
intrlv_16x32( vdata, edata, edata, edata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, edata, edata, edata, edata, edata, 1024 );
|
||||
|
||||
sha256_16way_init( &sha256_16w_mid );
|
||||
sha256_16way_update( &sha256_16w_mid, vdata, LBRY_MIDSTATE );
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm512_bswap_32( _mm512_set_epi32(
|
||||
n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+ 1, n ) );
|
||||
lbry_16way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
if ( unlikely( hash7[ i ] <= Htarg ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
extr_lane_16x32( lane_hash, hash, i, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[27] = n + i;
|
||||
submit_lane_solution( work, lane_hash, mythr, i );
|
||||
}
|
||||
}
|
||||
n += 16;
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined(LBRY_8WAY)
|
||||
|
||||
static __thread sha256_8way_context sha256_8w_mid;
|
||||
|
||||
void lbry_8way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t _ALIGN(64) vhashA[16<<3];
|
||||
uint32_t _ALIGN(128) vhashA[16<<3];
|
||||
uint32_t _ALIGN(64) vhashB[16<<3];
|
||||
uint32_t _ALIGN(64) vhashC[16<<3];
|
||||
uint32_t _ALIGN(32) h0[32];
|
||||
@@ -32,11 +163,11 @@ void lbry_8way_hash( void* output, const void* input )
|
||||
ripemd160_8way_context ctx_ripemd;
|
||||
|
||||
memcpy( &ctx_sha256, &sha256_8w_mid, sizeof(ctx_sha256) );
|
||||
sha256_8way( &ctx_sha256, input + (LBRY_MIDSTATE<<3), LBRY_TAIL );
|
||||
sha256_8way_update( &ctx_sha256, input + (LBRY_MIDSTATE<<3), LBRY_TAIL );
|
||||
sha256_8way_close( &ctx_sha256, vhashA );
|
||||
|
||||
sha256_8way_init( &ctx_sha256 );
|
||||
sha256_8way( &ctx_sha256, vhashA, 32 );
|
||||
sha256_8way_update( &ctx_sha256, vhashA, 32 );
|
||||
sha256_8way_close( &ctx_sha256, vhashA );
|
||||
|
||||
// reinterleave to do sha512 4-way 64 bit twice.
|
||||
@@ -45,11 +176,11 @@ void lbry_8way_hash( void* output, const void* input )
|
||||
intrlv_4x64( vhashB, h4, h5, h6, h7, 256 );
|
||||
|
||||
sha512_4way_init( &ctx_sha512 );
|
||||
sha512_4way( &ctx_sha512, vhashA, 32 );
|
||||
sha512_4way_update( &ctx_sha512, vhashA, 32 );
|
||||
sha512_4way_close( &ctx_sha512, vhashA );
|
||||
|
||||
sha512_4way_init( &ctx_sha512 );
|
||||
sha512_4way( &ctx_sha512, vhashB, 32 );
|
||||
sha512_4way_update( &ctx_sha512, vhashB, 32 );
|
||||
sha512_4way_close( &ctx_sha512, vhashB );
|
||||
|
||||
// back to 8-way 32 bit
|
||||
@@ -58,20 +189,20 @@ void lbry_8way_hash( void* output, const void* input )
|
||||
intrlv_8x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, 512 );
|
||||
|
||||
ripemd160_8way_init( &ctx_ripemd );
|
||||
ripemd160_8way( &ctx_ripemd, vhashA, 32 );
|
||||
ripemd160_8way_update( &ctx_ripemd, vhashA, 32 );
|
||||
ripemd160_8way_close( &ctx_ripemd, vhashB );
|
||||
|
||||
ripemd160_8way_init( &ctx_ripemd );
|
||||
ripemd160_8way( &ctx_ripemd, vhashA+(8<<3), 32 );
|
||||
ripemd160_8way_update( &ctx_ripemd, vhashA+(8<<3), 32 );
|
||||
ripemd160_8way_close( &ctx_ripemd, vhashC );
|
||||
|
||||
sha256_8way_init( &ctx_sha256 );
|
||||
sha256_8way( &ctx_sha256, vhashB, 20 );
|
||||
sha256_8way( &ctx_sha256, vhashC, 20 );
|
||||
sha256_8way_update( &ctx_sha256, vhashB, 20 );
|
||||
sha256_8way_update( &ctx_sha256, vhashC, 20 );
|
||||
sha256_8way_close( &ctx_sha256, vhashA );
|
||||
|
||||
sha256_8way_init( &ctx_sha256 );
|
||||
sha256_8way( &ctx_sha256, vhashA, 32 );
|
||||
sha256_8way_update( &ctx_sha256, vhashA, 32 );
|
||||
sha256_8way_close( &ctx_sha256, output );
|
||||
}
|
||||
|
||||
@@ -81,21 +212,16 @@ int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[32*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t edata[32] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[27];
|
||||
const uint32_t first_nonce = pdata[27];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t edata[32] __attribute__ ((aligned (64)));
|
||||
__m256i *noncev = (__m256i*)vdata + 27; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||
0xFFFFF000, 0xFFFF0000, 0 };
|
||||
|
||||
// we need bigendian data...
|
||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
@@ -106,33 +232,30 @@ int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
|
||||
casti_m128i( edata, 6 ) = mm128_bswap_32( casti_m128i( pdata, 6 ) );
|
||||
casti_m128i( edata, 7 ) = mm128_bswap_32( casti_m128i( pdata, 7 ) );
|
||||
intrlv_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 1024 );
|
||||
edata, edata, edata, edata, 1024 );
|
||||
|
||||
sha256_8way_init( &sha256_8w_mid );
|
||||
sha256_8way( &sha256_8w_mid, vdata, LBRY_MIDSTATE );
|
||||
sha256_8way_update( &sha256_8w_mid, vdata, LBRY_MIDSTATE );
|
||||
|
||||
for ( int m = 0; m < sizeof(masks); m++ ) if ( Htarg <= htmax[m] )
|
||||
do
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32(
|
||||
n+7,n+6,n+5,n+4,n+3,n+2,n+1,n ) );
|
||||
lbry_8way_hash( hash, vdata );
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32(
|
||||
n+7,n+6,n+5,n+4,n+3,n+2,n+1,n ) );
|
||||
lbry_8way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 8; i++ ) if ( !( hash7[ i ] & mask ) )
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( unlikely( hash7[ i ] <= Htarg ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
extr_lane_8x32( lane_hash, hash, i, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
extr_lane_8x32( lane_hash, hash, i, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[27] = n + i;
|
||||
submit_lane_solution( work, lane_hash, mythr, i );
|
||||
}
|
||||
pdata[27] = n + i;
|
||||
submit_lane_solution( work, lane_hash, mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -98,16 +98,23 @@ int lbry_get_work_data_size() { return LBRY_WORK_DATA_SIZE; }
|
||||
|
||||
bool register_lbry_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT | SHA_OPT;
|
||||
#if defined (LBRY_8WAY)
|
||||
// gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
||||
#if defined (LBRY_16WAY)
|
||||
gate->scanhash = (void*)&scanhash_lbry_16way;
|
||||
gate->hash = (void*)&lbry_16way_hash;
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||
#elif defined (LBRY_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_lbry_8way;
|
||||
gate->hash = (void*)&lbry_8way_hash;
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||
#elif defined (LBRY_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_lbry_4way;
|
||||
gate->hash = (void*)&lbry_4way_hash;
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_lbry;
|
||||
gate->hash = (void*)&lbry_hash;
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
||||
#endif
|
||||
gate->calc_network_diff = (void*)&lbry_calc_network_diff;
|
||||
gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
|
||||
|
@@ -4,11 +4,19 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define LBRY_16WAY 1
|
||||
#elif defined(__AVX2__)
|
||||
#define LBRY_8WAY 1
|
||||
#endif
|
||||
/*
|
||||
#if !defined(__SHA__)
|
||||
#if defined(__AVX2__)
|
||||
#define LBRY_8WAY
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
|
||||
#define LBRY_NTIME_INDEX 25
|
||||
#define LBRY_NBITS_INDEX 26
|
||||
@@ -18,18 +26,23 @@
|
||||
|
||||
bool register_lbry_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(LBRY_8WAY)
|
||||
#if defined(LBRY_16WAY)
|
||||
|
||||
void lbry_16way_hash( void *state, const void *input );
|
||||
int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#elif defined(LBRY_8WAY)
|
||||
|
||||
void lbry_8way_hash( void *state, const void *input );
|
||||
int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
/*
|
||||
|
||||
#elif defined(LBRY_4WAY)
|
||||
|
||||
void lbry_4way_hash( void *state, const void *input );
|
||||
int scanhash_lbry_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
*/
|
||||
|
||||
#else
|
||||
|
||||
void lbry_hash( void *state, const void *input );
|
||||
|
@@ -80,9 +80,6 @@ int scanhash_lbry( struct work *work, uint32_t max_nonce,
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 32 );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < sizeof(masks); m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
@@ -90,23 +87,11 @@ int scanhash_lbry( struct work *work, uint32_t max_nonce,
|
||||
pdata[27] = ++n;
|
||||
be32enc(&endiandata[27], n);
|
||||
lbry_hash(hash64, &endiandata);
|
||||
#ifndef DEBUG_ALGO
|
||||
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
pdata[27] = n;
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
} while ( (n < max_nonce -8) && !work_restart[thr_id].restart);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@@ -259,7 +259,8 @@ void ripemd160_4way_init( ripemd160_4way_context *sc )
|
||||
sc->count_high = sc->count_low = 0;
|
||||
}
|
||||
|
||||
void ripemd160_4way( ripemd160_4way_context *sc, const void *data, size_t len )
|
||||
void ripemd160_4way_update( ripemd160_4way_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m128i *vdata = (__m128i*)data;
|
||||
size_t ptr;
|
||||
@@ -559,7 +560,8 @@ void ripemd160_8way_init( ripemd160_8way_context *sc )
|
||||
sc->count_high = sc->count_low = 0;
|
||||
}
|
||||
|
||||
void ripemd160_8way( ripemd160_8way_context *sc, const void *data, size_t len )
|
||||
void ripemd160_8way_update( ripemd160_8way_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
size_t ptr;
|
||||
@@ -623,3 +625,303 @@ void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst )
|
||||
|
||||
#endif // __AVX2__
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
// RIPEMD-160 16 way
|
||||
|
||||
|
||||
#define F16W_1(x, y, z) \
|
||||
_mm512_xor_si512( _mm512_xor_si512( x, y ), z )
|
||||
|
||||
#define F16W_2(x, y, z) \
|
||||
_mm512_xor_si512( _mm512_and_si512( _mm512_xor_si512( y, z ), x ), z )
|
||||
|
||||
#define F16W_3(x, y, z) \
|
||||
_mm512_xor_si512( _mm512_or_si512( x, mm512_not( y ) ), z )
|
||||
|
||||
#define F16W_4(x, y, z) \
|
||||
_mm512_xor_si512( _mm512_and_si512( _mm512_xor_si512( x, y ), z ), y )
|
||||
|
||||
#define F16W_5(x, y, z) \
|
||||
_mm512_xor_si512( x, _mm512_or_si512( y, mm512_not( z ) ) )
|
||||
|
||||
#define RR_16W(a, b, c, d, e, f, s, r, k) \
|
||||
do{ \
|
||||
a = _mm512_add_epi32( mm512_rol_32( _mm512_add_epi32( _mm512_add_epi32( \
|
||||
_mm512_add_epi32( a, f( b ,c, d ) ), r ), \
|
||||
m512_const1_64( k ) ), s ), e ); \
|
||||
c = mm512_rol_32( c, 10 );\
|
||||
} while (0)
|
||||
|
||||
#define ROUND1_16W(a, b, c, d, e, f, s, r, k) \
|
||||
RR_16W(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k)
|
||||
|
||||
#define ROUND2_16W(a, b, c, d, e, f, s, r, k) \
|
||||
RR_16W(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
|
||||
|
||||
static void ripemd160_16way_round( ripemd160_16way_context *sc )
|
||||
{
|
||||
const __m512i *in = (__m512i*)sc->buf;
|
||||
__m512i *h = (__m512i*)sc->val;
|
||||
register __m512i A1, B1, C1, D1, E1;
|
||||
register __m512i A2, B2, C2, D2, E2;
|
||||
__m512i tmp;
|
||||
|
||||
A1 = A2 = h[0];
|
||||
B1 = B2 = h[1];
|
||||
C1 = C2 = h[2];
|
||||
D1 = D2 = h[3];
|
||||
E1 = E2 = h[4];
|
||||
|
||||
ROUND1_16W( A, B, C, D, E, F16W_1, 11, in[ 0], 1 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_1, 14, in[ 1], 1 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_1, 15, in[ 2], 1 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_1, 12, in[ 3], 1 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_1, 5, in[ 4], 1 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_1, 8, in[ 5], 1 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_1, 7, in[ 6], 1 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_1, 9, in[ 7], 1 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_1, 11, in[ 8], 1 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_1, 13, in[ 9], 1 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_1, 14, in[10], 1 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_1, 15, in[11], 1 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_1, 6, in[12], 1 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_1, 7, in[13], 1 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_1, 9, in[14], 1 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_1, 8, in[15], 1 );
|
||||
|
||||
ROUND1_16W( E, A, B, C, D, F16W_2, 7, in[ 7], 2 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_2, 6, in[ 4], 2 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_2, 8, in[13], 2 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_2, 13, in[ 1], 2 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_2, 11, in[10], 2 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_2, 9, in[ 6], 2 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_2, 7, in[15], 2 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_2, 15, in[ 3], 2 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_2, 7, in[12], 2 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_2, 12, in[ 0], 2 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_2, 15, in[ 9], 2 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_2, 9, in[ 5], 2 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_2, 11, in[ 2], 2 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_2, 7, in[14], 2 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_2, 13, in[11], 2 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_2, 12, in[ 8], 2 );
|
||||
|
||||
ROUND1_16W( D, E, A, B, C, F16W_3, 11, in[ 3], 3 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_3, 13, in[10], 3 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_3, 6, in[14], 3 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_3, 7, in[ 4], 3 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_3, 14, in[ 9], 3 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_3, 9, in[15], 3 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_3, 13, in[ 8], 3 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_3, 15, in[ 1], 3 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_3, 14, in[ 2], 3 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_3, 8, in[ 7], 3 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_3, 13, in[ 0], 3 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_3, 6, in[ 6], 3 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_3, 5, in[13], 3 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_3, 12, in[11], 3 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_3, 7, in[ 5], 3 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_3, 5, in[12], 3 );
|
||||
|
||||
ROUND1_16W( C, D, E, A, B, F16W_4, 11, in[ 1], 4 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_4, 12, in[ 9], 4 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_4, 14, in[11], 4 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_4, 15, in[10], 4 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_4, 14, in[ 0], 4 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_4, 15, in[ 8], 4 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_4, 9, in[12], 4 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_4, 8, in[ 4], 4 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_4, 9, in[13], 4 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_4, 14, in[ 3], 4 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_4, 5, in[ 7], 4 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_4, 6, in[15], 4 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_4, 8, in[14], 4 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_4, 6, in[ 5], 4 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_4, 5, in[ 6], 4 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_4, 12, in[ 2], 4 );
|
||||
|
||||
ROUND1_16W( B, C, D, E, A, F16W_5, 9, in[ 4], 5 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_5, 15, in[ 0], 5 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_5, 5, in[ 5], 5 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_5, 11, in[ 9], 5 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_5, 6, in[ 7], 5 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_5, 8, in[12], 5 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_5, 13, in[ 2], 5 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_5, 12, in[10], 5 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_5, 5, in[14], 5 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_5, 12, in[ 1], 5 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_5, 13, in[ 3], 5 );
|
||||
ROUND1_16W( A, B, C, D, E, F16W_5, 14, in[ 8], 5 );
|
||||
ROUND1_16W( E, A, B, C, D, F16W_5, 11, in[11], 5 );
|
||||
ROUND1_16W( D, E, A, B, C, F16W_5, 8, in[ 6], 5 );
|
||||
ROUND1_16W( C, D, E, A, B, F16W_5, 5, in[15], 5 );
|
||||
ROUND1_16W( B, C, D, E, A, F16W_5, 6, in[13], 5 );
|
||||
|
||||
ROUND2_16W( A, B, C, D, E, F16W_5, 8, in[ 5], 1 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_5, 9, in[14], 1 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_5, 9, in[ 7], 1 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_5, 11, in[ 0], 1 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_5, 13, in[ 9], 1 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_5, 15, in[ 2], 1 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_5, 15, in[11], 1 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_5, 5, in[ 4], 1 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_5, 7, in[13], 1 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_5, 7, in[ 6], 1 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_5, 8, in[15], 1 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_5, 11, in[ 8], 1 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_5, 14, in[ 1], 1 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_5, 14, in[10], 1 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_5, 12, in[ 3], 1 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_5, 6, in[12], 1 );
|
||||
|
||||
ROUND2_16W( E, A, B, C, D, F16W_4, 9, in[ 6], 2 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_4, 13, in[11], 2 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_4, 15, in[ 3], 2 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_4, 7, in[ 7], 2 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_4, 12, in[ 0], 2 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_4, 8, in[13], 2 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_4, 9, in[ 5], 2 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_4, 11, in[10], 2 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_4, 7, in[14], 2 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_4, 7, in[15], 2 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_4, 12, in[ 8], 2 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_4, 7, in[12], 2 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_4, 6, in[ 4], 2 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_4, 15, in[ 9], 2 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_4, 13, in[ 1], 2 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_4, 11, in[ 2], 2 );
|
||||
|
||||
ROUND2_16W( D, E, A, B, C, F16W_3, 9, in[15], 3 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_3, 7, in[ 5], 3 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_3, 15, in[ 1], 3 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_3, 11, in[ 3], 3 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_3, 8, in[ 7], 3 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_3, 6, in[14], 3 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_3, 6, in[ 6], 3 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_3, 14, in[ 9], 3 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_3, 12, in[11], 3 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_3, 13, in[ 8], 3 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_3, 5, in[12], 3 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_3, 14, in[ 2], 3 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_3, 13, in[10], 3 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_3, 13, in[ 0], 3 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_3, 7, in[ 4], 3 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_3, 5, in[13], 3 );
|
||||
|
||||
ROUND2_16W( C, D, E, A, B, F16W_2, 15, in[ 8], 4 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_2, 5, in[ 6], 4 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_2, 8, in[ 4], 4 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_2, 11, in[ 1], 4 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_2, 14, in[ 3], 4 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_2, 14, in[11], 4 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_2, 6, in[15], 4 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_2, 14, in[ 0], 4 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_2, 6, in[ 5], 4 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_2, 9, in[12], 4 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_2, 12, in[ 2], 4 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_2, 9, in[13], 4 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_2, 12, in[ 9], 4 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_2, 5, in[ 7], 4 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_2, 15, in[10], 4 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_2, 8, in[14], 4 );
|
||||
|
||||
ROUND2_16W( B, C, D, E, A, F16W_1, 8, in[12], 5 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_1, 5, in[15], 5 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_1, 12, in[10], 5 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_1, 9, in[ 4], 5 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_1, 12, in[ 1], 5 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_1, 5, in[ 5], 5 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_1, 14, in[ 8], 5 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_1, 6, in[ 7], 5 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_1, 8, in[ 6], 5 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_1, 13, in[ 2], 5 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_1, 6, in[13], 5 );
|
||||
ROUND2_16W( A, B, C, D, E, F16W_1, 5, in[14], 5 );
|
||||
ROUND2_16W( E, A, B, C, D, F16W_1, 15, in[ 0], 5 );
|
||||
ROUND2_16W( D, E, A, B, C, F16W_1, 13, in[ 3], 5 );
|
||||
ROUND2_16W( C, D, E, A, B, F16W_1, 11, in[ 9], 5 );
|
||||
ROUND2_16W( B, C, D, E, A, F16W_1, 11, in[11], 5 );
|
||||
|
||||
tmp = _mm512_add_epi32( _mm512_add_epi32( h[1], C1 ), D2 );
|
||||
h[1] = _mm512_add_epi32( _mm512_add_epi32( h[2], D1 ), E2 );
|
||||
h[2] = _mm512_add_epi32( _mm512_add_epi32( h[3], E1 ), A2 );
|
||||
h[3] = _mm512_add_epi32( _mm512_add_epi32( h[4], A1 ), B2 );
|
||||
h[4] = _mm512_add_epi32( _mm512_add_epi32( h[0], B1 ), C2 );
|
||||
h[0] = tmp;
|
||||
}
|
||||
|
||||
void ripemd160_16way_init( ripemd160_16way_context *sc )
|
||||
{
|
||||
sc->val[0] = m512_const1_64( 0x6745230167452301 );
|
||||
sc->val[1] = m512_const1_64( 0xEFCDAB89EFCDAB89 );
|
||||
sc->val[2] = m512_const1_64( 0x98BADCFE98BADCFE );
|
||||
sc->val[3] = m512_const1_64( 0x1032547610325476 );
|
||||
sc->val[4] = m512_const1_64( 0xC3D2E1F0C3D2E1F0 );
|
||||
sc->count_high = sc->count_low = 0;
|
||||
}
|
||||
|
||||
void ripemd160_16way_update( ripemd160_16way_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
size_t ptr;
|
||||
const int block_size = 64;
|
||||
|
||||
ptr = (unsigned)sc->count_low & (block_size - 1U);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
uint32_t clow, clow2;
|
||||
|
||||
clen = block_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_512( sc->buf + (ptr>>2), vdata, clen>>2 );
|
||||
vdata = vdata + (clen>>2);
|
||||
ptr += clen;
|
||||
len -= clen;
|
||||
if ( ptr == block_size )
|
||||
{
|
||||
ripemd160_16way_round( sc );
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
clow2 = clow + clen;
|
||||
sc->count_low = clow2;
|
||||
if ( clow2 < clow )
|
||||
sc->count_high++;
|
||||
}
|
||||
}
|
||||
|
||||
void ripemd160_16way_close( ripemd160_16way_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr, u;
|
||||
uint32_t low, high;
|
||||
const int block_size = 64;
|
||||
const int pad = block_size - 8;
|
||||
|
||||
ptr = (unsigned)sc->count_low & ( block_size - 1U);
|
||||
sc->buf[ ptr>>2 ] = m512_const1_32( 0x80 );
|
||||
ptr += 4;
|
||||
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_512( sc->buf + (ptr>>2), (block_size - ptr) >> 2 );
|
||||
ripemd160_16way_round( sc );
|
||||
memset_zero_512( sc->buf, pad>>2 );
|
||||
}
|
||||
else
|
||||
memset_zero_512( sc->buf + (ptr>>2), (pad - ptr) >> 2 );
|
||||
|
||||
low = sc->count_low;
|
||||
high = (sc->count_high << 3) | (low >> 29);
|
||||
low = low << 3;
|
||||
sc->buf[ pad>>2 ] = _mm512_set1_epi32( low );
|
||||
sc->buf[ (pad>>2) + 1 ] = _mm512_set1_epi32( high );
|
||||
ripemd160_16way_round( sc );
|
||||
for (u = 0; u < 5; u ++)
|
||||
casti_m512i( dst, u ) = sc->val[u];
|
||||
}
|
||||
|
||||
#endif // AVX512
|
||||
|
@@ -16,7 +16,8 @@ typedef struct
|
||||
} __attribute__ ((aligned (64))) ripemd160_4way_context;
|
||||
|
||||
void ripemd160_4way_init( ripemd160_4way_context *sc );
|
||||
void ripemd160_4way( ripemd160_4way_context *sc, const void *data, size_t len );
|
||||
void ripemd160_4way_update( ripemd160_4way_context *sc, const void *data,
|
||||
size_t len );
|
||||
void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst );
|
||||
|
||||
#if defined (__AVX2__)
|
||||
@@ -26,13 +27,28 @@ typedef struct
|
||||
__m256i buf[64>>2];
|
||||
__m256i val[5];
|
||||
uint32_t count_high, count_low;
|
||||
} __attribute__ ((aligned (64))) ripemd160_8way_context;
|
||||
} __attribute__ ((aligned (128))) ripemd160_8way_context;
|
||||
|
||||
void ripemd160_8way_init( ripemd160_8way_context *sc );
|
||||
void ripemd160_8way( ripemd160_8way_context *sc, const void *data, size_t len );
|
||||
void ripemd160_8way_update( ripemd160_8way_context *sc, const void *data,
|
||||
size_t len );
|
||||
void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst );
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
__m512i buf[64>>2];
|
||||
__m512i val[5];
|
||||
uint32_t count_high, count_low;
|
||||
} __attribute__ ((aligned (128))) ripemd160_16way_context;
|
||||
|
||||
void ripemd160_16way_init( ripemd160_16way_context *sc );
|
||||
void ripemd160_16way_update( ripemd160_16way_context *sc, const void *data,
|
||||
size_t len );
|
||||
void ripemd160_16way_close( ripemd160_16way_context *sc, void *dst );
|
||||
|
||||
#endif // AVX512
|
||||
#endif // __AVX2__
|
||||
#endif // __SSE4_2__
|
||||
#endif // RIPEMD_HASH_4WAY_H__
|
||||
|
@@ -41,13 +41,9 @@
|
||||
#define SHA2_HASH_4WAY_H__ 1
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#if defined(__SSE2__)
|
||||
//#if defined(__SSE4_2__)
|
||||
|
||||
//#define SPH_SIZE_sha256 256
|
||||
|
||||
// SHA-256 4 way
|
||||
|
||||
@@ -59,9 +55,12 @@ typedef struct {
|
||||
} sha256_4way_context __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256_4way_init( sha256_4way_context *sc );
|
||||
void sha256_4way( sha256_4way_context *sc, const void *data, size_t len );
|
||||
void sha256_4way_update( sha256_4way_context *sc, const void *data,
|
||||
size_t len );
|
||||
void sha256_4way_close( sha256_4way_context *sc, void *dst );
|
||||
|
||||
#endif // SSE2
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
// SHA-256 8 way
|
||||
@@ -74,10 +73,29 @@ typedef struct {
|
||||
} sha256_8way_context __attribute__ ((aligned (128)));
|
||||
|
||||
void sha256_8way_init( sha256_8way_context *sc );
|
||||
void sha256_8way( sha256_8way_context *sc, const void *data, size_t len );
|
||||
void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len );
|
||||
void sha256_8way_close( sha256_8way_context *sc, void *dst );
|
||||
|
||||
//#define SPH_SIZE_sha512 512
|
||||
#endif // AVX2
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
// SHA-256 16 way
|
||||
|
||||
typedef struct {
|
||||
__m512i buf[64>>2];
|
||||
__m512i val[8];
|
||||
uint32_t count_high, count_low;
|
||||
bool initialized;
|
||||
} sha256_16way_context __attribute__ ((aligned (128)));
|
||||
|
||||
void sha256_16way_init( sha256_16way_context *sc );
|
||||
void sha256_16way_update( sha256_16way_context *sc, const void *data, size_t len );
|
||||
void sha256_16way_close( sha256_16way_context *sc, void *dst );
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
// SHA-512 4 way
|
||||
|
||||
@@ -91,9 +109,10 @@ typedef struct {
|
||||
void sha512_4way_init( sha512_4way_context *sc);
|
||||
void sha512_4way_update( sha512_4way_context *sc, const void *data,
|
||||
size_t len );
|
||||
#define sha512_4way sha512_4way_update
|
||||
void sha512_4way_close( sha512_4way_context *sc, void *dst );
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
// SHA-512 8 way
|
||||
@@ -110,8 +129,6 @@ void sha512_8way_update( sha512_8way_context *sc, const void *data,
|
||||
size_t len );
|
||||
void sha512_8way_close( sha512_8way_context *sc, void *dst );
|
||||
|
||||
|
||||
#endif // AVX512
|
||||
#endif // __AVX2__
|
||||
#endif // __SSE2__
|
||||
|
||||
#endif // SHA256_4WAY_H__
|
||||
|
@@ -39,47 +39,31 @@
|
||||
// SHA-256 32 bit
|
||||
|
||||
/*
|
||||
static const sph_u32 H256[8] = {
|
||||
SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85),
|
||||
SPH_C32(0x3C6EF372), SPH_C32(0xA54FF53A),
|
||||
SPH_C32(0x510E527F), SPH_C32(0x9B05688C),
|
||||
SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19)
|
||||
static const uint32_t H256[8] =
|
||||
{
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||
};
|
||||
*/
|
||||
|
||||
static const sph_u32 K256[64] = {
|
||||
SPH_C32(0x428A2F98), SPH_C32(0x71374491),
|
||||
SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5),
|
||||
SPH_C32(0x3956C25B), SPH_C32(0x59F111F1),
|
||||
SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5),
|
||||
SPH_C32(0xD807AA98), SPH_C32(0x12835B01),
|
||||
SPH_C32(0x243185BE), SPH_C32(0x550C7DC3),
|
||||
SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE),
|
||||
SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174),
|
||||
SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786),
|
||||
SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC),
|
||||
SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA),
|
||||
SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA),
|
||||
SPH_C32(0x983E5152), SPH_C32(0xA831C66D),
|
||||
SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7),
|
||||
SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147),
|
||||
SPH_C32(0x06CA6351), SPH_C32(0x14292967),
|
||||
SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138),
|
||||
SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13),
|
||||
SPH_C32(0x650A7354), SPH_C32(0x766A0ABB),
|
||||
SPH_C32(0x81C2C92E), SPH_C32(0x92722C85),
|
||||
SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B),
|
||||
SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3),
|
||||
SPH_C32(0xD192E819), SPH_C32(0xD6990624),
|
||||
SPH_C32(0xF40E3585), SPH_C32(0x106AA070),
|
||||
SPH_C32(0x19A4C116), SPH_C32(0x1E376C08),
|
||||
SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5),
|
||||
SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A),
|
||||
SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3),
|
||||
SPH_C32(0x748F82EE), SPH_C32(0x78A5636F),
|
||||
SPH_C32(0x84C87814), SPH_C32(0x8CC70208),
|
||||
SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB),
|
||||
SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2)
|
||||
static const uint32_t K256[64] =
|
||||
{
|
||||
0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
|
||||
0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
|
||||
0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
|
||||
0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
|
||||
0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
|
||||
0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
|
||||
0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
|
||||
0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
|
||||
0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
|
||||
0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
|
||||
0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
|
||||
0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
|
||||
0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
|
||||
0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
|
||||
0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
|
||||
0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2
|
||||
};
|
||||
|
||||
// SHA-256 4 way
|
||||
@@ -248,7 +232,7 @@ void sha256_4way_init( sha256_4way_context *sc )
|
||||
*/
|
||||
}
|
||||
|
||||
void sha256_4way( sha256_4way_context *sc, const void *data, size_t len )
|
||||
void sha256_4way_update( sha256_4way_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m128i *vdata = (__m128i*)data;
|
||||
size_t ptr;
|
||||
@@ -273,7 +257,7 @@ void sha256_4way( sha256_4way_context *sc, const void *data, size_t len )
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
clow2 = SPH_T32( clow + clen );
|
||||
clow2 = clow + clen;
|
||||
sc->count_low = clow2;
|
||||
if ( clow2 < clow )
|
||||
sc->count_high++;
|
||||
@@ -306,10 +290,8 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
|
||||
|
||||
sc->buf[ pad >> 2 ] =
|
||||
mm128_bswap_32( m128_const1_32( high ) );
|
||||
// mm128_bswap_32( _mm_set1_epi32( high ) );
|
||||
sc->buf[ ( pad+4 ) >> 2 ] =
|
||||
mm128_bswap_32( m128_const1_32( low ) );
|
||||
// mm128_bswap_32( _mm_set1_epi32( low ) );
|
||||
sha256_4way_round( sc, sc->buf, sc->val );
|
||||
|
||||
mm128_block_bswap_32( dst, sc->val );
|
||||
@@ -483,7 +465,7 @@ void sha256_8way_init( sha256_8way_context *sc )
|
||||
*/
|
||||
}
|
||||
|
||||
void sha256_8way( sha256_8way_context *sc, const void *data, size_t len )
|
||||
void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
size_t ptr;
|
||||
@@ -508,7 +490,7 @@ void sha256_8way( sha256_8way_context *sc, const void *data, size_t len )
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
clow2 = SPH_T32( clow + clen );
|
||||
clow2 = clow + clen;
|
||||
sc->count_low = clow2;
|
||||
if ( clow2 < clow )
|
||||
sc->count_high++;
|
||||
@@ -549,5 +531,233 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst )
|
||||
mm256_block_bswap_32( dst, sc->val );
|
||||
}
|
||||
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
// SHA-256 16 way
|
||||
|
||||
#define CHx16(X, Y, Z) \
|
||||
_mm512_xor_si512( _mm512_and_si512( _mm512_xor_si512( Y, Z ), X ), Z )
|
||||
|
||||
#define MAJx16(X, Y, Z) \
|
||||
_mm512_or_si512( _mm512_and_si512( X, Y ), \
|
||||
_mm512_and_si512( _mm512_or_si512( X, Y ), Z ) )
|
||||
|
||||
#define BSG2_0x16(x) \
|
||||
_mm512_xor_si512( _mm512_xor_si512( \
|
||||
mm512_ror_32(x, 2), mm512_ror_32(x, 13) ), mm512_ror_32( x, 22) )
|
||||
|
||||
#define BSG2_1x16(x) \
|
||||
_mm512_xor_si512( _mm512_xor_si512( \
|
||||
mm512_ror_32(x, 6), mm512_ror_32(x, 11) ), mm512_ror_32( x, 25) )
|
||||
|
||||
#define SSG2_0x16(x) \
|
||||
_mm512_xor_si512( _mm512_xor_si512( \
|
||||
mm512_ror_32(x, 7), mm512_ror_32(x, 18) ), _mm512_srli_epi32(x, 3) )
|
||||
|
||||
#define SSG2_1x16(x) \
|
||||
_mm512_xor_si512( _mm512_xor_si512( \
|
||||
mm512_ror_32(x, 17), mm512_ror_32(x, 19) ), _mm512_srli_epi32(x, 10) )
|
||||
|
||||
#define SHA2x16_MEXP( a, b, c, d ) \
|
||||
mm512_add4_32( SSG2_1x16( W[a] ), W[b], SSG2_0x16( W[c] ), W[d] );
|
||||
|
||||
#define SHA2s_16WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
|
||||
do { \
|
||||
__m512i T1, T2; \
|
||||
__m512i K = _mm512_set1_epi32( K256[( (j)+(i) )] ); \
|
||||
T1 = _mm512_add_epi32( H, mm512_add4_32( BSG2_1x16(E), CHx16(E, F, G), \
|
||||
K, W[i] ) ); \
|
||||
T2 = _mm512_add_epi32( BSG2_0x16(A), MAJx16(A, B, C) ); \
|
||||
D = _mm512_add_epi32( D, T1 ); \
|
||||
H = _mm512_add_epi32( T1, T2 ); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
sha256_16way_round( sha256_16way_context *ctx, __m512i *in, __m512i r[8] )
|
||||
{
|
||||
register __m512i A, B, C, D, E, F, G, H;
|
||||
__m512i W[16];
|
||||
|
||||
mm512_block_bswap_32( W , in );
|
||||
mm512_block_bswap_32( W+8, in+8 );
|
||||
|
||||
if ( ctx->initialized )
|
||||
{
|
||||
A = r[0];
|
||||
B = r[1];
|
||||
C = r[2];
|
||||
D = r[3];
|
||||
E = r[4];
|
||||
F = r[5];
|
||||
G = r[6];
|
||||
H = r[7];
|
||||
}
|
||||
else
|
||||
{
|
||||
A = m512_const1_64( 0x6A09E6676A09E667 );
|
||||
B = m512_const1_64( 0xBB67AE85BB67AE85 );
|
||||
C = m512_const1_64( 0x3C6EF3723C6EF372 );
|
||||
D = m512_const1_64( 0xA54FF53AA54FF53A );
|
||||
E = m512_const1_64( 0x510E527F510E527F );
|
||||
F = m512_const1_64( 0x9B05688C9B05688C );
|
||||
G = m512_const1_64( 0x1F83D9AB1F83D9AB );
|
||||
H = m512_const1_64( 0x5BE0CD195BE0CD19 );
|
||||
}
|
||||
|
||||
SHA2s_16WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
|
||||
SHA2s_16WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
|
||||
SHA2s_16WAY_STEP( G, H, A, B, C, D, E, F, 2, 0 );
|
||||
SHA2s_16WAY_STEP( F, G, H, A, B, C, D, E, 3, 0 );
|
||||
SHA2s_16WAY_STEP( E, F, G, H, A, B, C, D, 4, 0 );
|
||||
SHA2s_16WAY_STEP( D, E, F, G, H, A, B, C, 5, 0 );
|
||||
SHA2s_16WAY_STEP( C, D, E, F, G, H, A, B, 6, 0 );
|
||||
SHA2s_16WAY_STEP( B, C, D, E, F, G, H, A, 7, 0 );
|
||||
SHA2s_16WAY_STEP( A, B, C, D, E, F, G, H, 8, 0 );
|
||||
SHA2s_16WAY_STEP( H, A, B, C, D, E, F, G, 9, 0 );
|
||||
SHA2s_16WAY_STEP( G, H, A, B, C, D, E, F, 10, 0 );
|
||||
SHA2s_16WAY_STEP( F, G, H, A, B, C, D, E, 11, 0 );
|
||||
SHA2s_16WAY_STEP( E, F, G, H, A, B, C, D, 12, 0 );
|
||||
SHA2s_16WAY_STEP( D, E, F, G, H, A, B, C, 13, 0 );
|
||||
SHA2s_16WAY_STEP( C, D, E, F, G, H, A, B, 14, 0 );
|
||||
SHA2s_16WAY_STEP( B, C, D, E, F, G, H, A, 15, 0 );
|
||||
|
||||
for ( int j = 16; j < 64; j += 16 )
|
||||
{
|
||||
W[ 0] = SHA2x16_MEXP( 14, 9, 1, 0 );
|
||||
W[ 1] = SHA2x16_MEXP( 15, 10, 2, 1 );
|
||||
W[ 2] = SHA2x16_MEXP( 0, 11, 3, 2 );
|
||||
W[ 3] = SHA2x16_MEXP( 1, 12, 4, 3 );
|
||||
W[ 4] = SHA2x16_MEXP( 2, 13, 5, 4 );
|
||||
W[ 5] = SHA2x16_MEXP( 3, 14, 6, 5 );
|
||||
W[ 6] = SHA2x16_MEXP( 4, 15, 7, 6 );
|
||||
W[ 7] = SHA2x16_MEXP( 5, 0, 8, 7 );
|
||||
W[ 8] = SHA2x16_MEXP( 6, 1, 9, 8 );
|
||||
W[ 9] = SHA2x16_MEXP( 7, 2, 10, 9 );
|
||||
W[10] = SHA2x16_MEXP( 8, 3, 11, 10 );
|
||||
W[11] = SHA2x16_MEXP( 9, 4, 12, 11 );
|
||||
W[12] = SHA2x16_MEXP( 10, 5, 13, 12 );
|
||||
W[13] = SHA2x16_MEXP( 11, 6, 14, 13 );
|
||||
W[14] = SHA2x16_MEXP( 12, 7, 15, 14 );
|
||||
W[15] = SHA2x16_MEXP( 13, 8, 0, 15 );
|
||||
|
||||
SHA2s_16WAY_STEP( A, B, C, D, E, F, G, H, 0, j );
|
||||
SHA2s_16WAY_STEP( H, A, B, C, D, E, F, G, 1, j );
|
||||
SHA2s_16WAY_STEP( G, H, A, B, C, D, E, F, 2, j );
|
||||
SHA2s_16WAY_STEP( F, G, H, A, B, C, D, E, 3, j );
|
||||
SHA2s_16WAY_STEP( E, F, G, H, A, B, C, D, 4, j );
|
||||
SHA2s_16WAY_STEP( D, E, F, G, H, A, B, C, 5, j );
|
||||
SHA2s_16WAY_STEP( C, D, E, F, G, H, A, B, 6, j );
|
||||
SHA2s_16WAY_STEP( B, C, D, E, F, G, H, A, 7, j );
|
||||
SHA2s_16WAY_STEP( A, B, C, D, E, F, G, H, 8, j );
|
||||
SHA2s_16WAY_STEP( H, A, B, C, D, E, F, G, 9, j );
|
||||
SHA2s_16WAY_STEP( G, H, A, B, C, D, E, F, 10, j );
|
||||
SHA2s_16WAY_STEP( F, G, H, A, B, C, D, E, 11, j );
|
||||
SHA2s_16WAY_STEP( E, F, G, H, A, B, C, D, 12, j );
|
||||
SHA2s_16WAY_STEP( D, E, F, G, H, A, B, C, 13, j );
|
||||
SHA2s_16WAY_STEP( C, D, E, F, G, H, A, B, 14, j );
|
||||
SHA2s_16WAY_STEP( B, C, D, E, F, G, H, A, 15, j );
|
||||
}
|
||||
|
||||
if ( ctx->initialized )
|
||||
{
|
||||
r[0] = _mm512_add_epi32( r[0], A );
|
||||
r[1] = _mm512_add_epi32( r[1], B );
|
||||
r[2] = _mm512_add_epi32( r[2], C );
|
||||
r[3] = _mm512_add_epi32( r[3], D );
|
||||
r[4] = _mm512_add_epi32( r[4], E );
|
||||
r[5] = _mm512_add_epi32( r[5], F );
|
||||
r[6] = _mm512_add_epi32( r[6], G );
|
||||
r[7] = _mm512_add_epi32( r[7], H );
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx->initialized = true;
|
||||
r[0] = _mm512_add_epi32( A, m512_const1_64( 0x6A09E6676A09E667 ) );
|
||||
r[1] = _mm512_add_epi32( B, m512_const1_64( 0xBB67AE85BB67AE85 ) );
|
||||
r[2] = _mm512_add_epi32( C, m512_const1_64( 0x3C6EF3723C6EF372 ) );
|
||||
r[3] = _mm512_add_epi32( D, m512_const1_64( 0xA54FF53AA54FF53A ) );
|
||||
r[4] = _mm512_add_epi32( E, m512_const1_64( 0x510E527F510E527F ) );
|
||||
r[5] = _mm512_add_epi32( F, m512_const1_64( 0x9B05688C9B05688C ) );
|
||||
r[6] = _mm512_add_epi32( G, m512_const1_64( 0x1F83D9AB1F83D9AB ) );
|
||||
r[7] = _mm512_add_epi32( H, m512_const1_64( 0x5BE0CD195BE0CD19 ) );
|
||||
}
|
||||
}
|
||||
|
||||
void sha256_16way_init( sha256_16way_context *sc )
|
||||
{
|
||||
sc->initialized = false;
|
||||
sc->count_high = sc->count_low = 0;
|
||||
}
|
||||
|
||||
|
||||
void sha256_16way_update( sha256_16way_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
size_t ptr;
|
||||
const int buf_size = 64;
|
||||
|
||||
ptr = (unsigned)sc->count_low & (buf_size - 1U);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
uint32_t clow, clow2;
|
||||
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_512( sc->buf + (ptr>>2), vdata, clen>>2 );
|
||||
vdata = vdata + (clen>>2);
|
||||
ptr += clen;
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
sha256_16way_round( sc, sc->buf, sc->val );
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
clow2 = clow + clen;
|
||||
sc->count_low = clow2;
|
||||
if ( clow2 < clow )
|
||||
sc->count_high++;
|
||||
}
|
||||
}
|
||||
|
||||
void sha256_16way_close( sha256_16way_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr;
|
||||
uint32_t low, high;
|
||||
const int buf_size = 64;
|
||||
const int pad = buf_size - 8;
|
||||
|
||||
ptr = (unsigned)sc->count_low & (buf_size - 1U);
|
||||
sc->buf[ ptr>>2 ] = m512_const1_64( 0x0000008000000080 );
|
||||
ptr += 4;
|
||||
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_512( sc->buf + (ptr>>2), (buf_size - ptr) >> 2 );
|
||||
sha256_16way_round( sc, sc->buf, sc->val );
|
||||
memset_zero_512( sc->buf, pad >> 2 );
|
||||
}
|
||||
else
|
||||
memset_zero_512( sc->buf + (ptr>>2), (pad - ptr) >> 2 );
|
||||
|
||||
low = sc->count_low;
|
||||
high = (sc->count_high << 3) | (low >> 29);
|
||||
low = low << 3;
|
||||
|
||||
sc->buf[ pad >> 2 ] =
|
||||
mm512_bswap_32( m512_const1_32( high ) );
|
||||
sc->buf[ ( pad+4 ) >> 2 ] =
|
||||
mm512_bswap_32( m512_const1_32( low ) );
|
||||
|
||||
sha256_16way_round( sc, sc->buf, sc->val );
|
||||
|
||||
mm512_block_bswap_32( dst, sc->val );
|
||||
}
|
||||
|
||||
#endif // AVX512
|
||||
#endif // __AVX2__
|
||||
#endif // __SSE2__
|
||||
|
@@ -15,19 +15,19 @@ void sha256q_8way_hash( void* output, const void* input )
|
||||
sha256_8way_context ctx;
|
||||
memcpy( &ctx, &sha256_ctx8, sizeof ctx );
|
||||
|
||||
sha256_8way( &ctx, input + (64<<3), 16 );
|
||||
sha256_8way_update( &ctx, input + (64<<3), 16 );
|
||||
sha256_8way_close( &ctx, vhash );
|
||||
|
||||
sha256_8way_init( &ctx );
|
||||
sha256_8way( &ctx, vhash, 32 );
|
||||
sha256_8way_update( &ctx, vhash, 32 );
|
||||
sha256_8way_close( &ctx, vhash );
|
||||
|
||||
sha256_8way_init( &ctx );
|
||||
sha256_8way( &ctx, vhash, 32 );
|
||||
sha256_8way_update( &ctx, vhash, 32 );
|
||||
sha256_8way_close( &ctx, vhash );
|
||||
|
||||
sha256_8way_init( &ctx );
|
||||
sha256_8way( &ctx, vhash, 32 );
|
||||
sha256_8way_update( &ctx, vhash, 32 );
|
||||
sha256_8way_close( &ctx, output );
|
||||
}
|
||||
|
||||
@@ -61,7 +61,7 @@ int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
|
||||
// Need big endian data
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
sha256_8way_init( &sha256_ctx8 );
|
||||
sha256_8way( &sha256_ctx8, vdata, 64 );
|
||||
sha256_8way_update( &sha256_ctx8, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
@@ -108,19 +108,19 @@ void sha256q_4way_hash( void* output, const void* input )
|
||||
sha256_4way_context ctx;
|
||||
memcpy( &ctx, &sha256_ctx4, sizeof ctx );
|
||||
|
||||
sha256_4way( &ctx, input + (64<<2), 16 );
|
||||
sha256_4way_update( &ctx, input + (64<<2), 16 );
|
||||
sha256_4way_close( &ctx, vhash );
|
||||
|
||||
sha256_4way_init( &ctx );
|
||||
sha256_4way( &ctx, vhash, 32 );
|
||||
sha256_4way_update( &ctx, vhash, 32 );
|
||||
sha256_4way_close( &ctx, vhash );
|
||||
|
||||
sha256_4way_init( &ctx );
|
||||
sha256_4way( &ctx, vhash, 32 );
|
||||
sha256_4way_update( &ctx, vhash, 32 );
|
||||
sha256_4way_close( &ctx, vhash );
|
||||
|
||||
sha256_4way_init( &ctx );
|
||||
sha256_4way( &ctx, vhash, 32 );
|
||||
sha256_4way_update( &ctx, vhash, 32 );
|
||||
sha256_4way_close( &ctx, output );
|
||||
}
|
||||
|
||||
@@ -154,7 +154,7 @@ int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
sha256_4way_init( &sha256_ctx4 );
|
||||
sha256_4way( &sha256_ctx4, vdata, 64 );
|
||||
sha256_4way_update( &sha256_ctx4, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
|
@@ -15,15 +15,15 @@ void sha256t_8way_hash( void* output, const void* input )
|
||||
sha256_8way_context ctx;
|
||||
memcpy( &ctx, &sha256_ctx8, sizeof ctx );
|
||||
|
||||
sha256_8way( &ctx, input + (64<<3), 16 );
|
||||
sha256_8way_update( &ctx, input + (64<<3), 16 );
|
||||
sha256_8way_close( &ctx, vhash );
|
||||
|
||||
sha256_8way_init( &ctx );
|
||||
sha256_8way( &ctx, vhash, 32 );
|
||||
sha256_8way_update( &ctx, vhash, 32 );
|
||||
sha256_8way_close( &ctx, vhash );
|
||||
|
||||
sha256_8way_init( &ctx );
|
||||
sha256_8way( &ctx, vhash, 32 );
|
||||
sha256_8way_update( &ctx, vhash, 32 );
|
||||
sha256_8way_close( &ctx, output );
|
||||
}
|
||||
|
||||
@@ -59,7 +59,7 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
||||
// Need big endian data
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
sha256_8way_init( &sha256_ctx8 );
|
||||
sha256_8way( &sha256_ctx8, vdata, 64 );
|
||||
sha256_8way_update( &sha256_ctx8, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
@@ -101,15 +101,15 @@ void sha256t_4way_hash( void* output, const void* input )
|
||||
sha256_4way_context ctx;
|
||||
memcpy( &ctx, &sha256_ctx4, sizeof ctx );
|
||||
|
||||
sha256_4way( &ctx, input + (64<<2), 16 );
|
||||
sha256_4way_update( &ctx, input + (64<<2), 16 );
|
||||
sha256_4way_close( &ctx, vhash );
|
||||
|
||||
sha256_4way_init( &ctx );
|
||||
sha256_4way( &ctx, vhash, 32 );
|
||||
sha256_4way_update( &ctx, vhash, 32 );
|
||||
sha256_4way_close( &ctx, vhash );
|
||||
|
||||
sha256_4way_init( &ctx );
|
||||
sha256_4way( &ctx, vhash, 32 );
|
||||
sha256_4way_update( &ctx, vhash, 32 );
|
||||
sha256_4way_close( &ctx, output );
|
||||
}
|
||||
|
||||
@@ -143,7 +143,7 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
||||
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
sha256_4way_init( &sha256_ctx4 );
|
||||
sha256_4way( &sha256_ctx4, vdata, 64 );
|
||||
sha256_4way_update( &sha256_ctx4, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
|
@@ -37,55 +37,57 @@
|
||||
#include "sha-hash-4way.h"
|
||||
|
||||
/*
|
||||
static const sph_u64 H512[8] = {
|
||||
SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
|
||||
SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
|
||||
SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F),
|
||||
SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179)
|
||||
static const uit64_t H512[8] =
|
||||
{
|
||||
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
|
||||
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
|
||||
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
|
||||
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
|
||||
};
|
||||
*/
|
||||
|
||||
static const sph_u64 K512[80] = {
|
||||
SPH_C64(0x428A2F98D728AE22), SPH_C64(0x7137449123EF65CD),
|
||||
SPH_C64(0xB5C0FBCFEC4D3B2F), SPH_C64(0xE9B5DBA58189DBBC),
|
||||
SPH_C64(0x3956C25BF348B538), SPH_C64(0x59F111F1B605D019),
|
||||
SPH_C64(0x923F82A4AF194F9B), SPH_C64(0xAB1C5ED5DA6D8118),
|
||||
SPH_C64(0xD807AA98A3030242), SPH_C64(0x12835B0145706FBE),
|
||||
SPH_C64(0x243185BE4EE4B28C), SPH_C64(0x550C7DC3D5FFB4E2),
|
||||
SPH_C64(0x72BE5D74F27B896F), SPH_C64(0x80DEB1FE3B1696B1),
|
||||
SPH_C64(0x9BDC06A725C71235), SPH_C64(0xC19BF174CF692694),
|
||||
SPH_C64(0xE49B69C19EF14AD2), SPH_C64(0xEFBE4786384F25E3),
|
||||
SPH_C64(0x0FC19DC68B8CD5B5), SPH_C64(0x240CA1CC77AC9C65),
|
||||
SPH_C64(0x2DE92C6F592B0275), SPH_C64(0x4A7484AA6EA6E483),
|
||||
SPH_C64(0x5CB0A9DCBD41FBD4), SPH_C64(0x76F988DA831153B5),
|
||||
SPH_C64(0x983E5152EE66DFAB), SPH_C64(0xA831C66D2DB43210),
|
||||
SPH_C64(0xB00327C898FB213F), SPH_C64(0xBF597FC7BEEF0EE4),
|
||||
SPH_C64(0xC6E00BF33DA88FC2), SPH_C64(0xD5A79147930AA725),
|
||||
SPH_C64(0x06CA6351E003826F), SPH_C64(0x142929670A0E6E70),
|
||||
SPH_C64(0x27B70A8546D22FFC), SPH_C64(0x2E1B21385C26C926),
|
||||
SPH_C64(0x4D2C6DFC5AC42AED), SPH_C64(0x53380D139D95B3DF),
|
||||
SPH_C64(0x650A73548BAF63DE), SPH_C64(0x766A0ABB3C77B2A8),
|
||||
SPH_C64(0x81C2C92E47EDAEE6), SPH_C64(0x92722C851482353B),
|
||||
SPH_C64(0xA2BFE8A14CF10364), SPH_C64(0xA81A664BBC423001),
|
||||
SPH_C64(0xC24B8B70D0F89791), SPH_C64(0xC76C51A30654BE30),
|
||||
SPH_C64(0xD192E819D6EF5218), SPH_C64(0xD69906245565A910),
|
||||
SPH_C64(0xF40E35855771202A), SPH_C64(0x106AA07032BBD1B8),
|
||||
SPH_C64(0x19A4C116B8D2D0C8), SPH_C64(0x1E376C085141AB53),
|
||||
SPH_C64(0x2748774CDF8EEB99), SPH_C64(0x34B0BCB5E19B48A8),
|
||||
SPH_C64(0x391C0CB3C5C95A63), SPH_C64(0x4ED8AA4AE3418ACB),
|
||||
SPH_C64(0x5B9CCA4F7763E373), SPH_C64(0x682E6FF3D6B2B8A3),
|
||||
SPH_C64(0x748F82EE5DEFB2FC), SPH_C64(0x78A5636F43172F60),
|
||||
SPH_C64(0x84C87814A1F0AB72), SPH_C64(0x8CC702081A6439EC),
|
||||
SPH_C64(0x90BEFFFA23631E28), SPH_C64(0xA4506CEBDE82BDE9),
|
||||
SPH_C64(0xBEF9A3F7B2C67915), SPH_C64(0xC67178F2E372532B),
|
||||
SPH_C64(0xCA273ECEEA26619C), SPH_C64(0xD186B8C721C0C207),
|
||||
SPH_C64(0xEADA7DD6CDE0EB1E), SPH_C64(0xF57D4F7FEE6ED178),
|
||||
SPH_C64(0x06F067AA72176FBA), SPH_C64(0x0A637DC5A2C898A6),
|
||||
SPH_C64(0x113F9804BEF90DAE), SPH_C64(0x1B710B35131C471B),
|
||||
SPH_C64(0x28DB77F523047D84), SPH_C64(0x32CAAB7B40C72493),
|
||||
SPH_C64(0x3C9EBE0A15C9BEBC), SPH_C64(0x431D67C49C100D4C),
|
||||
SPH_C64(0x4CC5D4BECB3E42B6), SPH_C64(0x597F299CFC657E2A),
|
||||
SPH_C64(0x5FCB6FAB3AD6FAEC), SPH_C64(0x6C44198C4A475817)
|
||||
static const uint64_t K512[80] =
|
||||
{
|
||||
0x428A2F98D728AE22, 0x7137449123EF65CD,
|
||||
0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC,
|
||||
0x3956C25BF348B538, 0x59F111F1B605D019,
|
||||
0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118,
|
||||
0xD807AA98A3030242, 0x12835B0145706FBE,
|
||||
0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2,
|
||||
0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1,
|
||||
0x9BDC06A725C71235, 0xC19BF174CF692694,
|
||||
0xE49B69C19EF14AD2, 0xEFBE4786384F25E3,
|
||||
0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65,
|
||||
0x2DE92C6F592B0275, 0x4A7484AA6EA6E483,
|
||||
0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5,
|
||||
0x983E5152EE66DFAB, 0xA831C66D2DB43210,
|
||||
0xB00327C898FB213F, 0xBF597FC7BEEF0EE4,
|
||||
0xC6E00BF33DA88FC2, 0xD5A79147930AA725,
|
||||
0x06CA6351E003826F, 0x142929670A0E6E70,
|
||||
0x27B70A8546D22FFC, 0x2E1B21385C26C926,
|
||||
0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF,
|
||||
0x650A73548BAF63DE, 0x766A0ABB3C77B2A8,
|
||||
0x81C2C92E47EDAEE6, 0x92722C851482353B,
|
||||
0xA2BFE8A14CF10364, 0xA81A664BBC423001,
|
||||
0xC24B8B70D0F89791, 0xC76C51A30654BE30,
|
||||
0xD192E819D6EF5218, 0xD69906245565A910,
|
||||
0xF40E35855771202A, 0x106AA07032BBD1B8,
|
||||
0x19A4C116B8D2D0C8, 0x1E376C085141AB53,
|
||||
0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8,
|
||||
0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB,
|
||||
0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3,
|
||||
0x748F82EE5DEFB2FC, 0x78A5636F43172F60,
|
||||
0x84C87814A1F0AB72, 0x8CC702081A6439EC,
|
||||
0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9,
|
||||
0xBEF9A3F7B2C67915, 0xC67178F2E372532B,
|
||||
0xCA273ECEEA26619C, 0xD186B8C721C0C207,
|
||||
0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178,
|
||||
0x06F067AA72176FBA, 0x0A637DC5A2C898A6,
|
||||
0x113F9804BEF90DAE, 0x1B710B35131C471B,
|
||||
0x28DB77F523047D84, 0x32CAAB7B40C72493,
|
||||
0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C,
|
||||
0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A,
|
||||
0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817
|
||||
};
|
||||
|
||||
|
||||
|
@@ -97,7 +97,7 @@ void shabal256_4way_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
||||
|
||||
void shabal512_4way_init( void *cc );
|
||||
void shabal512_4way_update( void *cc, const void *data, size_t len );
|
||||
#define shabal512_4way shabal512_4way_update
|
||||
//#define shabal512_4way shabal512_4way_update
|
||||
void shabal512_4way_close( void *cc, void *dst );
|
||||
void shabal512_4way_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
||||
void *dst );
|
||||
|
@@ -100,9 +100,20 @@ c512( sph_shavite_big_context *sc, const void *msg )
|
||||
p3 = h[3];
|
||||
|
||||
// round
|
||||
|
||||
// working proof of concept
|
||||
/*
|
||||
__m512i K = m512_const1_128( m[0] );
|
||||
__m512i X = _mm512_xor_si512( m512_const1_128( p1 ), K );
|
||||
X = _mm512_aesenc_epi128( X, m512_zero );
|
||||
k00 = _mm512_castsi512_si128( K );
|
||||
x = _mm512_castsi512_si128( X );
|
||||
*/
|
||||
|
||||
k00 = m[0];
|
||||
x = _mm_xor_si128( p1, k00 );
|
||||
x = _mm_aesenc_si128( x, zero );
|
||||
|
||||
k01 = m[1];
|
||||
x = _mm_xor_si128( x, k01 );
|
||||
x = _mm_aesenc_si128( x, zero );
|
||||
|
@@ -18,76 +18,18 @@ void skeinhash_8way( void *state, const void *input )
|
||||
uint64_t vhash64[8*8] __attribute__ ((aligned (128)));
|
||||
skein512_8way_context ctx_skein;
|
||||
|
||||
//#if defined(__SHA__)
|
||||
// uint32_t hash0[16] __attribute__ ((aligned (64)));
|
||||
// uint32_t hash1[16] __attribute__ ((aligned (64)));
|
||||
// uint32_t hash2[16] __attribute__ ((aligned (64)));
|
||||
// uint32_t hash3[16] __attribute__ ((aligned (64)));
|
||||
// uint32_t hash4[16] __attribute__ ((aligned (64)));
|
||||
// uint32_t hash5[16] __attribute__ ((aligned (64)));
|
||||
// uint32_t hash6[16] __attribute__ ((aligned (64)));
|
||||
// uint32_t hash7[16] __attribute__ ((aligned (64)));
|
||||
// SHA256_CTX ctx_sha256;
|
||||
//#else
|
||||
uint32_t vhash32[16*8] __attribute__ ((aligned (128)));
|
||||
sha256_8way_context ctx_sha256;
|
||||
//#endif
|
||||
|
||||
skein512_8way_init( &ctx_skein );
|
||||
skein512_8way_update( &ctx_skein, input, 80 );
|
||||
skein512_8way_close( &ctx_skein, vhash64 );
|
||||
/*
|
||||
#if defined(__SHA__)
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash64, 512 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash0, 64 );
|
||||
SHA256_Final( (unsigned char*)hash0, &ctx_sha256 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash1, 64 );
|
||||
SHA256_Final( (unsigned char*)hash1, &ctx_sha256 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash2, 64 );
|
||||
SHA256_Final( (unsigned char*)hash2, &ctx_sha256 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash3, 64 );
|
||||
SHA256_Final( (unsigned char*)hash3, &ctx_sha256 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash4, 64 );
|
||||
SHA256_Final( (unsigned char*)hash4, &ctx_sha256 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash5, 64 );
|
||||
SHA256_Final( (unsigned char*)hash5, &ctx_sha256 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash6, 64 );
|
||||
SHA256_Final( (unsigned char*)hash6, &ctx_sha256 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash7, 64 );
|
||||
SHA256_Final( (unsigned char*)hash7, &ctx_sha256 );
|
||||
|
||||
intrlv_8x32( state, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, 256 );
|
||||
#else
|
||||
*/
|
||||
|
||||
rintrlv_8x64_8x32( vhash32, vhash64, 512 );
|
||||
// dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
// vhash64, 512 );
|
||||
// intrlv_8x32( vhash32, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
// hash7, 512 );
|
||||
|
||||
sha256_8way_init( &ctx_sha256 );
|
||||
sha256_8way( &ctx_sha256, vhash32, 64 );
|
||||
sha256_8way_update( &ctx_sha256, vhash32, 64 );
|
||||
sha256_8way_close( &ctx_sha256, state );
|
||||
//#endif
|
||||
}
|
||||
|
||||
int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
|
||||
@@ -176,7 +118,7 @@ void skeinhash_4way( void *state, const void *input )
|
||||
rintrlv_4x64_4x32( vhash32, vhash64, 512 );
|
||||
|
||||
sha256_4way_init( &ctx_sha256 );
|
||||
sha256_4way( &ctx_sha256, vhash32, 64 );
|
||||
sha256_4way_update( &ctx_sha256, vhash32, 64 );
|
||||
sha256_4way_close( &ctx_sha256, state );
|
||||
#endif
|
||||
}
|
||||
|
@@ -93,12 +93,12 @@ typedef sph_skein_4way_big_context skein256_4way_context;
|
||||
void skein512_4way_init( skein512_4way_context *sc );
|
||||
void skein512_4way_update( void *cc, const void *data, size_t len );
|
||||
void skein512_4way_close( void *cc, void *dst );
|
||||
#define skein512_4way skein512_4way_update
|
||||
//#define skein512_4way skein512_4way_update
|
||||
|
||||
void skein256_4way_init( skein256_4way_context *sc );
|
||||
void skein256_4way_update( void *cc, const void *data, size_t len );
|
||||
void skein256_4way_close( void *cc, void *dst );
|
||||
#define skein256_4way skein256_4way_update
|
||||
//#define skein256_4way skein256_4way_update
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@@ -68,11 +68,11 @@ void skein2hash_4way( void *output, const void *input )
|
||||
uint64_t hash[16*4] __attribute__ ((aligned (64)));
|
||||
|
||||
skein512_4way_init( &ctx );
|
||||
skein512_4way( &ctx, input, 80 );
|
||||
skein512_4way_update( &ctx, input, 80 );
|
||||
skein512_4way_close( &ctx, hash );
|
||||
|
||||
skein512_4way_init( &ctx );
|
||||
skein512_4way( &ctx, hash, 64 );
|
||||
skein512_4way_update( &ctx, hash, 64 );
|
||||
skein512_4way_close( &ctx, output );
|
||||
}
|
||||
|
||||
|
@@ -50,41 +50,138 @@
|
||||
#include <string.h>
|
||||
#include "sm3-hash-4way.h"
|
||||
|
||||
#ifdef __SSE4_2__
|
||||
#ifdef __AVX2__
|
||||
|
||||
void sm3_4way_init( sm3_4way_ctx_t *ctx )
|
||||
#define P0_8W(x) \
|
||||
_mm256_xor_si256( x, _mm256_xor_si256( mm256_rol_32( x, 9 ), \
|
||||
mm256_rol_32( x, 17 ) ) )
|
||||
|
||||
#define P1_8W(x) \
|
||||
_mm256_xor_si256( x, _mm256_xor_si256( mm256_rol_32( x, 15 ), \
|
||||
mm256_rol_32( x, 23 ) ) )
|
||||
|
||||
#define FF0_8W(x,y,z) \
|
||||
_mm256_xor_si256( x, _mm256_xor_si256( y, z ) )
|
||||
|
||||
#define FF1_8W(x,y,z) \
|
||||
_mm256_or_si256( _mm256_or_si256( _mm256_and_si256( x, y ), \
|
||||
_mm256_and_si256( x, z ) ), \
|
||||
_mm256_and_si256( y, z ) )
|
||||
|
||||
#define GG0_8W(x,y,z) FF0_8W(x,y,z)
|
||||
|
||||
#define GG1_8W(x,y,z) \
|
||||
_mm256_or_si256( _mm256_and_si256( x, y ), \
|
||||
_mm256_andnot_si256( x, z ) )
|
||||
|
||||
void sm3_8way_compress( __m256i *digest, __m256i *block )
|
||||
{
|
||||
ctx->digest[0] = _mm_set1_epi32( 0x7380166F );
|
||||
ctx->digest[1] = _mm_set1_epi32( 0x4914B2B9 );
|
||||
ctx->digest[2] = _mm_set1_epi32( 0x172442D7 );
|
||||
ctx->digest[3] = _mm_set1_epi32( 0xDA8A0600 );
|
||||
ctx->digest[4] = _mm_set1_epi32( 0xA96F30BC );
|
||||
ctx->digest[5] = _mm_set1_epi32( 0x163138AA );
|
||||
ctx->digest[6] = _mm_set1_epi32( 0xE38DEE4D );
|
||||
ctx->digest[7] = _mm_set1_epi32( 0xB0FB0E4E );
|
||||
ctx->nblocks = 0;
|
||||
ctx->num = 0;
|
||||
__m256i W[68], W1[64];
|
||||
__m256i A = digest[ 0 ];
|
||||
__m256i B = digest[ 1 ];
|
||||
__m256i C = digest[ 2 ];
|
||||
__m256i D = digest[ 3 ];
|
||||
__m256i E = digest[ 4 ];
|
||||
__m256i F = digest[ 5 ];
|
||||
__m256i G = digest[ 6 ];
|
||||
__m256i H = digest[ 7 ];
|
||||
__m256i SS1, SS2, TT1, TT2, T;
|
||||
int j;
|
||||
|
||||
for ( j = 0; j < 16; j++ )
|
||||
W[j] = mm256_bswap_32( block[j] );
|
||||
|
||||
for ( j = 16; j < 68; j++ )
|
||||
W[j] = _mm256_xor_si256( P1_8W( _mm256_xor_si256(
|
||||
_mm256_xor_si256( W[ j-16 ], W[ j-9 ] ),
|
||||
mm256_rol_32( W[ j-3 ], 15 ) ) ),
|
||||
_mm256_xor_si256( mm256_rol_32( W[ j-13 ], 7 ), W[ j-6 ] ) );
|
||||
|
||||
for( j = 0; j < 64; j++ )
|
||||
W1[j] = _mm256_xor_si256( W[j], W[j+4] );
|
||||
|
||||
T = _mm256_set1_epi32( 0x79CC4519UL );
|
||||
for( j =0; j < 16; j++ )
|
||||
{
|
||||
SS1 = mm256_rol_32( _mm256_add_epi32( E, _mm256_add_epi32(
|
||||
mm256_rol_32( A, 12 ), mm256_rol_var_32( T, j ) ) ), 7 );
|
||||
SS2 = _mm256_xor_si256( SS1, mm256_rol_32( A, 12 ) );
|
||||
TT1 = _mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32(
|
||||
FF0_8W( A, B, C ), D ), SS2 ), W1[j] );
|
||||
TT2 = _mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32(
|
||||
GG0_8W( E, F, G ), H ), SS1 ), W[j] );
|
||||
D = C;
|
||||
C = mm256_rol_32( B, 9 );
|
||||
B = A;
|
||||
A = TT1;
|
||||
H = G;
|
||||
G = mm256_rol_32( F, 19 );
|
||||
F = E;
|
||||
E = P0_8W( TT2 );
|
||||
}
|
||||
|
||||
T = _mm256_set1_epi32( 0x7A879D8AUL );
|
||||
for( j =16; j < 64; j++ )
|
||||
{
|
||||
SS1 = mm256_rol_32( _mm256_add_epi32( _mm256_add_epi32(
|
||||
mm256_rol_32(A,12), E ), mm256_rol_var_32( T, j&31 ) ), 7 );
|
||||
SS2 = _mm256_xor_si256( SS1, mm256_rol_32( A, 12 ) );
|
||||
TT1 = _mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32(
|
||||
FF1_8W( A, B, C ), D ), SS2 ), W1[j] );
|
||||
TT2 = _mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32(
|
||||
GG1_8W( E, F, G ), H ), SS1 ), W[j] );
|
||||
D = C;
|
||||
C = mm256_rol_32( B, 9 );
|
||||
B = A;
|
||||
A = TT1;
|
||||
H = G;
|
||||
G = mm256_rol_32( F, 19 );
|
||||
F = E;
|
||||
E = P0_8W( TT2 );
|
||||
}
|
||||
|
||||
digest[0] = _mm256_xor_si256( digest[0], A );
|
||||
digest[1] = _mm256_xor_si256( digest[1], B );
|
||||
digest[2] = _mm256_xor_si256( digest[2], C );
|
||||
digest[3] = _mm256_xor_si256( digest[3], D );
|
||||
digest[4] = _mm256_xor_si256( digest[4], E );
|
||||
digest[5] = _mm256_xor_si256( digest[5], F );
|
||||
digest[6] = _mm256_xor_si256( digest[6], G );
|
||||
digest[7] = _mm256_xor_si256( digest[7], H );
|
||||
}
|
||||
|
||||
void sm3_4way( void *cc, const void *data, size_t len )
|
||||
void sm3_8way_init( sm3_8way_ctx_t *ctx )
|
||||
{
|
||||
sm3_4way_ctx_t *ctx = (sm3_4way_ctx_t*)cc;
|
||||
__m128i *block = (__m128i*)ctx->block;
|
||||
__m128i *vdata = (__m128i*)data;
|
||||
ctx->digest[0] = _mm256_set1_epi32( 0x7380166F );
|
||||
ctx->digest[1] = _mm256_set1_epi32( 0x4914B2B9 );
|
||||
ctx->digest[2] = _mm256_set1_epi32( 0x172442D7 );
|
||||
ctx->digest[3] = _mm256_set1_epi32( 0xDA8A0600 );
|
||||
ctx->digest[4] = _mm256_set1_epi32( 0xA96F30BC );
|
||||
ctx->digest[5] = _mm256_set1_epi32( 0x163138AA );
|
||||
ctx->digest[6] = _mm256_set1_epi32( 0xE38DEE4D );
|
||||
ctx->digest[7] = _mm256_set1_epi32( 0xB0FB0E4E );
|
||||
ctx->nblocks = 0;
|
||||
ctx->num = 0;
|
||||
}
|
||||
|
||||
void sm3_8way_update( void *cc, const void *data, size_t len )
|
||||
{
|
||||
sm3_8way_ctx_t *ctx = (sm3_8way_ctx_t*)cc;
|
||||
__m256i *block = (__m256i*)ctx->block;
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
if ( ctx->num )
|
||||
{
|
||||
unsigned int left = SM3_BLOCK_SIZE - ctx->num;
|
||||
if ( len < left )
|
||||
{
|
||||
memcpy_128( block + (ctx->num >> 2), vdata , len>>2 );
|
||||
memcpy_256( block + (ctx->num >> 2), vdata , len>>2 );
|
||||
ctx->num += len;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy_128( block + (ctx->num >> 2), vdata , left>>2 );
|
||||
sm3_4way_compress( ctx->digest, block );
|
||||
memcpy_256( block + (ctx->num >> 2), vdata , left>>2 );
|
||||
sm3_8way_compress( ctx->digest, block );
|
||||
ctx->nblocks++;
|
||||
vdata += left>>2;
|
||||
len -= left;
|
||||
@@ -92,49 +189,53 @@ void sm3_4way( void *cc, const void *data, size_t len )
|
||||
}
|
||||
while ( len >= SM3_BLOCK_SIZE )
|
||||
{
|
||||
sm3_4way_compress( ctx->digest, vdata );
|
||||
sm3_8way_compress( ctx->digest, vdata );
|
||||
ctx->nblocks++;
|
||||
vdata += SM3_BLOCK_SIZE>>2;
|
||||
len -= SM3_BLOCK_SIZE;
|
||||
}
|
||||
ctx->num = len;
|
||||
if ( len )
|
||||
memcpy_128( block, vdata, len>>2 );
|
||||
memcpy_256( block, vdata, len>>2 );
|
||||
}
|
||||
|
||||
void sm3_4way_close( void *cc, void *dst )
|
||||
void sm3_8way_close( void *cc, void *dst )
|
||||
{
|
||||
sm3_4way_ctx_t *ctx = (sm3_4way_ctx_t*)cc;
|
||||
__m128i *hash = (__m128i*)dst;
|
||||
__m128i *count = (__m128i*)(ctx->block + ( (SM3_BLOCK_SIZE - 8) >> 2 ) );
|
||||
__m128i *block = (__m128i*)ctx->block;
|
||||
sm3_8way_ctx_t *ctx = (sm3_8way_ctx_t*)cc;
|
||||
__m256i *hash = (__m256i*)dst;
|
||||
__m256i *count = (__m256i*)(ctx->block + ( (SM3_BLOCK_SIZE - 8) >> 2 ) );
|
||||
__m256i *block = (__m256i*)ctx->block;
|
||||
int i;
|
||||
|
||||
block[ctx->num] = _mm_set1_epi32( 0x80 );
|
||||
block[ctx->num] = _mm256_set1_epi32( 0x80 );
|
||||
|
||||
if ( ctx->num + 8 <= SM3_BLOCK_SIZE )
|
||||
{
|
||||
memset_zero_128( block + (ctx->num >> 2) + 1,
|
||||
( SM3_BLOCK_SIZE - ctx->num - 8 ) >> 2 );
|
||||
memset_zero_256( block + (ctx->num >> 2) + 1,
|
||||
( SM3_BLOCK_SIZE - ctx->num - 8 ) >> 2 );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_128( block + (ctx->num >> 2) + 1,
|
||||
memset_zero_256( block + (ctx->num >> 2) + 1,
|
||||
( SM3_BLOCK_SIZE - (ctx->num >> 2) - 1 ) );
|
||||
sm3_4way_compress( ctx->digest, block );
|
||||
memset_zero_128( block, ( SM3_BLOCK_SIZE - 8 ) >> 2 );
|
||||
sm3_8way_compress( ctx->digest, block );
|
||||
memset_zero_256( block, ( SM3_BLOCK_SIZE - 8 ) >> 2 );
|
||||
}
|
||||
|
||||
count[0] = mm128_bswap_32(
|
||||
_mm_set1_epi32( ctx->nblocks >> 23 ) );
|
||||
count[1] = mm128_bswap_32( _mm_set1_epi32( ( ctx->nblocks << 9 ) +
|
||||
count[0] = mm256_bswap_32(
|
||||
_mm256_set1_epi32( ctx->nblocks >> 23 ) );
|
||||
count[1] = mm256_bswap_32( _mm256_set1_epi32( ( ctx->nblocks << 9 ) +
|
||||
( ctx->num << 3 ) ) );
|
||||
sm3_4way_compress( ctx->digest, block );
|
||||
sm3_8way_compress( ctx->digest, block );
|
||||
|
||||
for ( i = 0; i < 8 ; i++ )
|
||||
hash[i] = mm128_bswap_32( ctx->digest[i] );
|
||||
hash[i] = mm256_bswap_32( ctx->digest[i] );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__)
|
||||
|
||||
#define P0(x) _mm_xor_si128( x, _mm_xor_si128( mm128_rol_32( x, 9 ), \
|
||||
mm128_rol_32( x, 17 ) ) )
|
||||
#define P1(x) _mm_xor_si128( x, _mm_xor_si128( mm128_rol_32( x, 15 ), \
|
||||
@@ -227,5 +328,88 @@ void sm3_4way_compress( __m128i *digest, __m128i *block )
|
||||
digest[7] = _mm_xor_si128( digest[7], H );
|
||||
}
|
||||
|
||||
void sm3_4way_init( sm3_4way_ctx_t *ctx )
|
||||
{
|
||||
ctx->digest[0] = _mm_set1_epi32( 0x7380166F );
|
||||
ctx->digest[1] = _mm_set1_epi32( 0x4914B2B9 );
|
||||
ctx->digest[2] = _mm_set1_epi32( 0x172442D7 );
|
||||
ctx->digest[3] = _mm_set1_epi32( 0xDA8A0600 );
|
||||
ctx->digest[4] = _mm_set1_epi32( 0xA96F30BC );
|
||||
ctx->digest[5] = _mm_set1_epi32( 0x163138AA );
|
||||
ctx->digest[6] = _mm_set1_epi32( 0xE38DEE4D );
|
||||
ctx->digest[7] = _mm_set1_epi32( 0xB0FB0E4E );
|
||||
ctx->nblocks = 0;
|
||||
ctx->num = 0;
|
||||
}
|
||||
|
||||
void sm3_4way_update( void *cc, const void *data, size_t len )
|
||||
{
|
||||
sm3_4way_ctx_t *ctx = (sm3_4way_ctx_t*)cc;
|
||||
__m128i *block = (__m128i*)ctx->block;
|
||||
__m128i *vdata = (__m128i*)data;
|
||||
|
||||
if ( ctx->num )
|
||||
{
|
||||
unsigned int left = SM3_BLOCK_SIZE - ctx->num;
|
||||
if ( len < left )
|
||||
{
|
||||
memcpy_128( block + (ctx->num >> 2), vdata , len>>2 );
|
||||
ctx->num += len;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy_128( block + (ctx->num >> 2), vdata , left>>2 );
|
||||
sm3_4way_compress( ctx->digest, block );
|
||||
ctx->nblocks++;
|
||||
vdata += left>>2;
|
||||
len -= left;
|
||||
}
|
||||
}
|
||||
while ( len >= SM3_BLOCK_SIZE )
|
||||
{
|
||||
sm3_4way_compress( ctx->digest, vdata );
|
||||
ctx->nblocks++;
|
||||
vdata += SM3_BLOCK_SIZE>>2;
|
||||
len -= SM3_BLOCK_SIZE;
|
||||
}
|
||||
ctx->num = len;
|
||||
if ( len )
|
||||
memcpy_128( block, vdata, len>>2 );
|
||||
}
|
||||
|
||||
void sm3_4way_close( void *cc, void *dst )
|
||||
{
|
||||
sm3_4way_ctx_t *ctx = (sm3_4way_ctx_t*)cc;
|
||||
__m128i *hash = (__m128i*)dst;
|
||||
__m128i *count = (__m128i*)(ctx->block + ( (SM3_BLOCK_SIZE - 8) >> 2 ) );
|
||||
__m128i *block = (__m128i*)ctx->block;
|
||||
int i;
|
||||
|
||||
block[ctx->num] = _mm_set1_epi32( 0x80 );
|
||||
|
||||
if ( ctx->num + 8 <= SM3_BLOCK_SIZE )
|
||||
{
|
||||
memset_zero_128( block + (ctx->num >> 2) + 1,
|
||||
( SM3_BLOCK_SIZE - ctx->num - 8 ) >> 2 );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_128( block + (ctx->num >> 2) + 1,
|
||||
( SM3_BLOCK_SIZE - (ctx->num >> 2) - 1 ) );
|
||||
sm3_4way_compress( ctx->digest, block );
|
||||
memset_zero_128( block, ( SM3_BLOCK_SIZE - 8 ) >> 2 );
|
||||
}
|
||||
|
||||
count[0] = mm128_bswap_32(
|
||||
_mm_set1_epi32( ctx->nblocks >> 23 ) );
|
||||
count[1] = mm128_bswap_32( _mm_set1_epi32( ( ctx->nblocks << 9 ) +
|
||||
( ctx->num << 3 ) ) );
|
||||
sm3_4way_compress( ctx->digest, block );
|
||||
|
||||
for ( i = 0; i < 8 ; i++ )
|
||||
hash[i] = mm128_bswap_32( ctx->digest[i] );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -48,14 +48,13 @@
|
||||
*/
|
||||
|
||||
#ifndef SPH_SM3_HASH_4WAY_H
|
||||
#define SPH_SM3_HASH_4WAY_H
|
||||
#define SPH_SM3_HASH_4WAY_H 1
|
||||
|
||||
#define SM3_DIGEST_LENGTH 32
|
||||
#define SM3_BLOCK_SIZE 64
|
||||
#define SM3_CBLOCK (SM3_BLOCK_SIZE)
|
||||
#define SM3_HMAC_SIZE (SM3_DIGEST_LENGTH)
|
||||
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@@ -65,7 +64,6 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct {
|
||||
__m128i block[16] __attribute__ ((aligned (64)));
|
||||
__m128i digest[8];
|
||||
@@ -74,15 +72,24 @@ typedef struct {
|
||||
} sm3_4way_ctx_t;
|
||||
|
||||
void sm3_4way_init( sm3_4way_ctx_t *ctx );
|
||||
//void sm3_4way_update( sm3_4way_ctx_t *ctx, const unsigned char* data,
|
||||
// size_t data_len );
|
||||
//void sm3_4way_final( sm3_4way_ctx_t *ctx,
|
||||
// unsigned char digest[SM3_DIGEST_LENGTH] );
|
||||
void sm3_4way_compress( __m128i *digest, __m128i *block );
|
||||
|
||||
void sm3_4way(void *cc, const void *data, size_t len);
|
||||
void sm3_4way_update(void *cc, const void *data, size_t len);
|
||||
void sm3_4way_close(void *cc, void *dst);
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
typedef struct {
|
||||
__m256i block[16] __attribute__ ((aligned (64)));
|
||||
__m256i digest[8];
|
||||
uint32_t nblocks;
|
||||
uint32_t num;
|
||||
} sm3_8way_ctx_t;
|
||||
|
||||
void sm3_8way_init( sm3_8way_ctx_t *ctx );
|
||||
void sm3_8way_update(void *cc, const void *data, size_t len);
|
||||
void sm3_8way_close(void *cc, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@@ -282,11 +282,11 @@ void c11_4way_hash( void *state, const void *input )
|
||||
memcpy( &ctx, &c11_4way_ctx, sizeof(c11_4way_ctx) );
|
||||
|
||||
// 1 Blake 4way
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
// 2 Bmw
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
@@ -305,15 +305,15 @@ void c11_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// 4 JH
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
// 5 Keccak
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
// 6 Skein
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
// Serial
|
||||
|
@@ -84,13 +84,13 @@ void timetravel_4way_hash(void *output, const void *input)
|
||||
switch ( permutation[i] )
|
||||
{
|
||||
case 0:
|
||||
blake512_4way( &ctx.blake, vhashA, dataLen );
|
||||
blake512_4way_update( &ctx.blake, vhashA, dataLen );
|
||||
blake512_4way_close( &ctx.blake, vhashB );
|
||||
if ( i == 7 )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 1:
|
||||
bmw512_4way( &ctx.bmw, vhashA, dataLen );
|
||||
bmw512_4way_update( &ctx.bmw, vhashA, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||
if ( i == 7 )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
@@ -112,19 +112,19 @@ void timetravel_4way_hash(void *output, const void *input)
|
||||
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 3:
|
||||
skein512_4way( &ctx.skein, vhashA, dataLen );
|
||||
skein512_4way_update( &ctx.skein, vhashA, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
if ( i == 7 )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 4:
|
||||
jh512_4way( &ctx.jh, vhashA, dataLen );
|
||||
jh512_4way_update( &ctx.jh, vhashA, dataLen );
|
||||
jh512_4way_close( &ctx.jh, vhashB );
|
||||
if ( i == 7 )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 5:
|
||||
keccak512_4way( &ctx.keccak, vhashA, dataLen );
|
||||
keccak512_4way_update( &ctx.keccak, vhashA, dataLen );
|
||||
keccak512_4way_close( &ctx.keccak, vhashB );
|
||||
if ( i == 7 )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
|
@@ -90,13 +90,13 @@ void timetravel10_4way_hash(void *output, const void *input)
|
||||
switch ( permutation[i] )
|
||||
{
|
||||
case 0:
|
||||
blake512_4way( &ctx.blake, vhashA, dataLen );
|
||||
blake512_4way_update( &ctx.blake, vhashA, dataLen );
|
||||
blake512_4way_close( &ctx.blake, vhashB );
|
||||
if ( i == 9 )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 1:
|
||||
bmw512_4way( &ctx.bmw, vhashA, dataLen );
|
||||
bmw512_4way_update( &ctx.bmw, vhashA, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||
if ( i == 9 )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
@@ -118,19 +118,19 @@ void timetravel10_4way_hash(void *output, const void *input)
|
||||
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 3:
|
||||
skein512_4way( &ctx.skein, vhashA, dataLen );
|
||||
skein512_4way_update( &ctx.skein, vhashA, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
if ( i == 9 )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 4:
|
||||
jh512_4way( &ctx.jh, vhashA, dataLen );
|
||||
jh512_4way_update( &ctx.jh, vhashA, dataLen );
|
||||
jh512_4way_close( &ctx.jh, vhashB );
|
||||
if ( i == 9 )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 5:
|
||||
keccak512_4way( &ctx.keccak, vhashA, dataLen );
|
||||
keccak512_4way_update( &ctx.keccak, vhashA, dataLen );
|
||||
keccak512_4way_close( &ctx.keccak, vhashB );
|
||||
if ( i == 9 )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
|
@@ -282,11 +282,11 @@ void x11_4way_hash( void *state, const void *input )
|
||||
memcpy( &ctx, &x11_4way_ctx, sizeof(x11_4way_ctx) );
|
||||
|
||||
// 1 Blake 4way
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
// 2 Bmw
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
@@ -305,15 +305,15 @@ void x11_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// 4 Skein
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
// 5 JH
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
// 6 Keccak
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
@@ -85,12 +85,12 @@ void x11evo_4way_hash( void *state, const void *input )
|
||||
switch ( idx )
|
||||
{
|
||||
case 0:
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
break;
|
||||
case 1:
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
if ( i >= len-1 )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
@@ -112,19 +112,19 @@ void x11evo_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 3:
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
if ( i >= len-1 )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
break;
|
||||
case 4:
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
if ( i >= len-1 )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
break;
|
||||
case 5:
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
if ( i >= len-1 )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
|
@@ -310,10 +310,10 @@ void x11gost_4way_hash( void *state, const void *input )
|
||||
x11gost_4way_ctx_holder ctx;
|
||||
memcpy( &ctx, &x11gost_4way_ctx, sizeof(x11gost_4way_ctx) );
|
||||
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
@@ -333,13 +333,13 @@ void x11gost_4way_hash( void *state, const void *input )
|
||||
// 4way
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
// Serial
|
||||
|
@@ -272,10 +272,10 @@ void x12_4way_hash( void *state, const void *input )
|
||||
x12_4way_ctx_holder ctx;
|
||||
memcpy( &ctx, &x12_4way_ctx, sizeof(x12_4way_ctx) );
|
||||
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
@@ -328,16 +328,16 @@ void x12_4way_hash( void *state, const void *input )
|
||||
|
||||
// Parallel 4way 64 bit
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64( state, state+32, state+64, state+96, vhash, 256 );
|
||||
|
@@ -225,11 +225,11 @@ void phi1612_4way_hash( void *state, const void *input )
|
||||
memcpy( &ctx, &phi1612_4way_ctx, sizeof(phi1612_4way_ctx) );
|
||||
|
||||
// Skein parallel 4way
|
||||
skein512_4way( &ctx.skein, input, 80 );
|
||||
skein512_4way_update( &ctx.skein, input, 80 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
// JH
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
// Serial to the end
|
||||
|
@@ -168,7 +168,7 @@ void skunk_4way_hash( void *output, const void *input )
|
||||
skunk_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &skunk_4way_ctx, sizeof(skunk_4way_ctx) );
|
||||
|
||||
skein512_4way( &ctx.skein, input, 80 );
|
||||
skein512_4way_update( &ctx.skein, input, 80 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
|
@@ -321,11 +321,11 @@ void x13_4way_hash( void *state, const void *input )
|
||||
memcpy( &ctx, &x13_4way_ctx, sizeof(x13_4way_ctx) );
|
||||
|
||||
// 1 Blake
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
// 2 Bmw
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
@@ -344,15 +344,15 @@ void x13_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// 4 Skein
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
// 5 JH
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
// 6 Keccak
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
// Serial
|
||||
@@ -416,7 +416,7 @@ void x13_4way_hash( void *state, const void *input )
|
||||
|
||||
// 12 Hamsi parallel 4way 32 bit
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
|
@@ -1,7 +1,4 @@
|
||||
#include "x13sm3-gate.h"
|
||||
|
||||
#if defined(X13SM3_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@@ -20,6 +17,281 @@
|
||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
|
||||
#if defined(X13BCD_8WAY)
|
||||
|
||||
typedef struct {
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
simd_4way_context simd;
|
||||
hashState_echo echo;
|
||||
sm3_8way_ctx_t sm3;
|
||||
hamsi512_8way_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
} x13bcd_8way_ctx_holder;
|
||||
|
||||
x13bcd_8way_ctx_holder x13bcd_8way_ctx __attribute__ ((aligned (64)));
|
||||
static __thread blake512_8way_context x13bcd_8way_ctx_mid;
|
||||
|
||||
void init_x13bcd_8way_ctx()
|
||||
{
|
||||
blake512_8way_init( &x13bcd_8way_ctx.blake );
|
||||
bmw512_8way_init( &x13bcd_8way_ctx.bmw );
|
||||
init_groestl( &x13bcd_8way_ctx.groestl, 64 );
|
||||
skein512_8way_init( &x13bcd_8way_ctx.skein );
|
||||
jh512_8way_init( &x13bcd_8way_ctx.jh );
|
||||
keccak512_8way_init( &x13bcd_8way_ctx.keccak );
|
||||
cubehashInit( &x13bcd_8way_ctx.cube, 512, 16, 32 );
|
||||
sph_shavite512_init( &x13bcd_8way_ctx.shavite );
|
||||
simd_4way_init( &x13bcd_8way_ctx.simd, 512 );
|
||||
init_echo( &x13bcd_8way_ctx.echo, 512 );
|
||||
sm3_8way_init( &x13bcd_8way_ctx.sm3 );
|
||||
hamsi512_8way_init( &x13bcd_8way_ctx.hamsi );
|
||||
sph_fugue512_init( &x13bcd_8way_ctx.fugue );
|
||||
};
|
||||
|
||||
void x13bcd_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint64_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||
uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash7[8] __attribute__ ((aligned (64)));
|
||||
x13bcd_8way_ctx_holder ctx;
|
||||
memcpy( &ctx, &x13bcd_8way_ctx, sizeof(x13bcd_8way_ctx) );
|
||||
|
||||
// Blake
|
||||
memcpy( &ctx.blake, &x13bcd_8way_ctx_mid, sizeof(x13bcd_8way_ctx_mid) );
|
||||
blake512_8way_update( &ctx.blake, input + (64<<3), 16 );
|
||||
blake512_8way_close( &ctx.blake, vhash );
|
||||
|
||||
// Bmw
|
||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
|
||||
// Groestl
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||
|
||||
// Parallel 4way
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
// Skein
|
||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
|
||||
// JH
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
|
||||
// Keccak
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
// SM3 parallel 32 bit
|
||||
rintrlv_8x64_8x32( vhashA, vhash, 512 );
|
||||
memset( vhash, 0, sizeof vhash );
|
||||
sm3_8way_update( &ctx.sm3, vhashA, 64 );
|
||||
sm3_8way_close( &ctx.sm3, vhash );
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
|
||||
// Cubehash
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
|
||||
memcpy( &ctx.cube, &x13bcd_8way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*) hash1, 64 );
|
||||
memcpy( &ctx.cube, &x13bcd_8way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*) hash2, 64 );
|
||||
memcpy( &ctx.cube, &x13bcd_8way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3, 64 );
|
||||
memcpy( &ctx.cube, &x13bcd_8way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash4, (const byte*) hash4, 64 );
|
||||
memcpy( &ctx.cube, &x13bcd_8way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash5, (const byte*) hash5, 64 );
|
||||
memcpy( &ctx.cube, &x13bcd_8way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash6, (const byte*) hash6, 64 );
|
||||
memcpy( &ctx.cube, &x13bcd_8way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash7, (const byte*) hash7, 64 );
|
||||
|
||||
// Shavite
|
||||
sph_shavite512( &ctx.shavite, hash0, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
memcpy( &ctx.shavite, &x13bcd_8way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash1, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
memcpy( &ctx.shavite, &x13bcd_8way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash2, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
memcpy( &ctx.shavite, &x13bcd_8way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash3, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
memcpy( &ctx.shavite, &x13bcd_8way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash4, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash4 );
|
||||
memcpy( &ctx.shavite, &x13bcd_8way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash5, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash5 );
|
||||
memcpy( &ctx.shavite, &x13bcd_8way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash6, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash6 );
|
||||
memcpy( &ctx.shavite, &x13bcd_8way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash7, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash7 );
|
||||
|
||||
// Simd
|
||||
intrlv_4x128( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
intrlv_4x128( vhash, hash4, hash5, hash6, hash7, 512 );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhash, 512 );
|
||||
|
||||
// Echo
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, 512 );
|
||||
memcpy( &ctx.echo, &x13bcd_8way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence *) hash1, 512 );
|
||||
memcpy( &ctx.echo, &x13bcd_8way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence *) hash2, 512 );
|
||||
memcpy( &ctx.echo, &x13bcd_8way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
memcpy( &ctx.echo, &x13bcd_8way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash4,
|
||||
(const BitSequence *) hash4, 512 );
|
||||
memcpy( &ctx.echo, &x13bcd_8way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash5,
|
||||
(const BitSequence *) hash5, 512 );
|
||||
memcpy( &ctx.echo, &x13bcd_8way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash6,
|
||||
(const BitSequence *) hash6, 512 );
|
||||
memcpy( &ctx.echo, &x13bcd_8way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash7,
|
||||
(const BitSequence *) hash7, 512 );
|
||||
|
||||
// Hamsi parallel 4x32x2
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
|
||||
// Fugue serial
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, state );
|
||||
memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
|
||||
sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash1, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, state+32 );
|
||||
memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
|
||||
sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash2, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, state+64 );
|
||||
memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
|
||||
sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, state+96 );
|
||||
memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
|
||||
sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash4, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, state+128 );
|
||||
memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
|
||||
sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash5, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, state+160 );
|
||||
memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
|
||||
sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash6, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, state+192 );
|
||||
memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
|
||||
sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash7, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, state+224 );
|
||||
}
|
||||
|
||||
int scanhash_x13bcd_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
|
||||
blake512_8way_init( &x13bcd_8way_ctx_mid );
|
||||
blake512_8way_update( &x13bcd_8way_ctx_mid, vdata, 64 );
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
x13bcd_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg )
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#elif defined(X13BCD_4WAY)
|
||||
|
||||
typedef struct {
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
@@ -68,11 +340,11 @@ void x13bcd_4way_hash( void *state, const void *input )
|
||||
|
||||
// Blake
|
||||
memcpy( &ctx.blake, &x13bcd_ctx_mid, sizeof(x13bcd_ctx_mid) );
|
||||
blake512_4way( &ctx.blake, input + (64<<2), 16 );
|
||||
blake512_4way_update( &ctx.blake, input + (64<<2), 16 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
// Bmw
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
@@ -91,15 +363,15 @@ void x13bcd_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// Skein
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
// JH
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
// Keccak
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -118,7 +390,7 @@ void x13bcd_4way_hash( void *state, const void *input )
|
||||
uint32_t sm3_hash3[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash3, 0, sizeof sm3_hash3 );
|
||||
|
||||
sm3_4way( &ctx.sm3, vhash, 64 );
|
||||
sm3_4way_update( &ctx.sm3, vhash, 64 );
|
||||
sm3_4way_close( &ctx.sm3, sm3_vhash );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 );
|
||||
|
||||
@@ -171,20 +443,23 @@ void x13bcd_4way_hash( void *state, const void *input )
|
||||
|
||||
// Hamsi parallel 4x32x2
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// Fugue serial
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
||||
memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue,
|
||||
sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash1, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
||||
memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue,
|
||||
sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash2, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
||||
memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue,
|
||||
sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
|
||||
@@ -203,44 +478,33 @@ int scanhash_x13bcd_4way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||
0xFFFFF000, 0xFFFF0000, 0 };
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
blake512_4way_init( &x13bcd_ctx_mid );
|
||||
blake512_4way( &x13bcd_ctx_mid, vdata, 64 );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
for ( int m=0; m < 6; m++ )
|
||||
if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x13bcd_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
x13bcd_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 ) )
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg )
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -71,13 +71,11 @@ void x13sm3_4way_hash( void *state, const void *input )
|
||||
|
||||
// Blake
|
||||
memcpy( &ctx.blake, &x13sm3_ctx_mid, sizeof(x13sm3_ctx_mid) );
|
||||
blake512_4way( &ctx.blake, input + (64<<2), 16 );
|
||||
|
||||
// blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_update( &ctx.blake, input + (64<<2), 16 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
// Bmw
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
@@ -96,15 +94,15 @@ void x13sm3_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// Skein
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
// JH
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
// Keccak
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
// Serial to the end
|
||||
@@ -180,13 +178,13 @@ void x13sm3_4way_hash( void *state, const void *input )
|
||||
uint32_t sm3_hash3[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash3, 0, sizeof sm3_hash3 );
|
||||
|
||||
sm3_4way( &ctx.sm3, vhash, 64 );
|
||||
sm3_4way_update( &ctx.sm3, vhash, 64 );
|
||||
sm3_4way_close( &ctx.sm3, sm3_vhash );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 );
|
||||
|
||||
// Hamsi parallel 4x32x2
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
|
@@ -17,7 +17,11 @@ bool register_x13sm3_algo( algo_gate_t* gate )
|
||||
|
||||
bool register_x13bcd_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X13SM3_4WAY)
|
||||
#if defined (X13BCD_8WAY)
|
||||
init_x13bcd_8way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_x13bcd_8way;
|
||||
gate->hash = (void*)&x13bcd_8way_hash;
|
||||
#elif defined (X13BCD_4WAY)
|
||||
init_x13bcd_4way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_x13bcd_4way;
|
||||
gate->hash = (void*)&x13bcd_4way_hash;
|
||||
@@ -26,7 +30,7 @@ bool register_x13bcd_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_x13bcd;
|
||||
gate->hash = (void*)&x13bcd_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -5,13 +5,11 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define X13SM3_4WAY
|
||||
#define X13SM3_4WAY 1
|
||||
#endif
|
||||
|
||||
bool register_x13sm3_algo( algo_gate_t* gate );
|
||||
|
||||
bool register_x13bcd_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(X13SM3_4WAY)
|
||||
|
||||
void x13sm3_4way_hash( void *state, const void *input );
|
||||
@@ -19,18 +17,39 @@ int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_x13sm3_4way_ctx();
|
||||
|
||||
void x13bcd_4way_hash( void *state, const void *input );
|
||||
int scanhash_x13bcd_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_x13bcd_4way_ctx();
|
||||
|
||||
#endif
|
||||
#else
|
||||
|
||||
void x13sm3_hash( void *state, const void *input );
|
||||
int scanhash_x13sm3( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_x13sm3_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define X13BCD_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define X13BCD_4WAY 1
|
||||
#endif
|
||||
|
||||
bool register_x13bcd_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(X13BCD_8WAY)
|
||||
|
||||
void x13bcd_8way_hash( void *state, const void *input );
|
||||
int scanhash_x13bcd_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_x13bcd_8way_ctx();
|
||||
|
||||
#elif defined(X13BCD_4WAY)
|
||||
|
||||
void x13bcd_4way_hash( void *state, const void *input );
|
||||
int scanhash_x13bcd_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_x13bcd_4way_ctx();
|
||||
|
||||
#else
|
||||
|
||||
void x13bcd_hash( void *state, const void *input );
|
||||
int scanhash_x13bcd( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
@@ -38,3 +57,4 @@ void init_x13bcd_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@@ -34,14 +34,14 @@ void polytimos_4way_hash( void *output, const void *input )
|
||||
poly_4way_context_overlay ctx;
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, input, 80 );
|
||||
skein512_4way_update( &ctx.skein, input, 80 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
// Need to convert from 64 bit interleaved to 32 bit interleaved.
|
||||
uint32_t vhash32[16*4];
|
||||
rintrlv_4x64_4x32( vhash32, vhash, 512 );
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash32, 64 );
|
||||
shabal512_4way_update( &ctx.shabal, vhash32, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash32 );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash32, 512 );
|
||||
|
||||
|
@@ -38,7 +38,7 @@ void veltor_4way_hash( void *output, const void *input )
|
||||
veltor_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &veltor_4way_ctx, sizeof(veltor_4way_ctx) );
|
||||
|
||||
skein512_4way( &ctx.skein, input, 80 );
|
||||
skein512_4way_update( &ctx.skein, input, 80 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
@@ -55,7 +55,7 @@ void veltor_4way_hash( void *output, const void *input )
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
|
@@ -325,11 +325,11 @@ void x14_4way_hash( void *state, const void *input )
|
||||
memcpy( &ctx, &x14_4way_ctx, sizeof(x14_4way_ctx) );
|
||||
|
||||
// 1 Blake
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
// 2 Bmw
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
@@ -348,15 +348,15 @@ void x14_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// 4 Skein
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
// 5 JH
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
// 6 Keccak
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
// Serial
|
||||
@@ -420,7 +420,7 @@ void x14_4way_hash( void *state, const void *input )
|
||||
|
||||
// 12 Hamsi parallel 4way 32 bit
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
|
@@ -374,11 +374,11 @@ void x15_4way_hash( void *state, const void *input )
|
||||
memcpy( &ctx, &x15_4way_ctx, sizeof(x15_4way_ctx) );
|
||||
|
||||
// 1 Blake
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
// 2 Bmw
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
@@ -397,15 +397,15 @@ void x15_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// 4 Skein
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
// 5 JH
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
// 6 Keccak
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
// Serial to the end
|
||||
@@ -469,7 +469,7 @@ void x15_4way_hash( void *state, const void *input )
|
||||
|
||||
// 12 Hamsi parallel 4way 32 bit
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
|
@@ -463,11 +463,11 @@ void x16r_4way_hash( void* output, const void* input )
|
||||
case BLAKE:
|
||||
blake512_4way_init( &ctx.blake );
|
||||
if ( i == 0 )
|
||||
blake512_4way( &ctx.blake, input, size );
|
||||
blake512_4way_update( &ctx.blake, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
blake512_4way( &ctx.blake, vhash, size );
|
||||
blake512_4way_update( &ctx.blake, vhash, size );
|
||||
}
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -475,11 +475,11 @@ void x16r_4way_hash( void* output, const void* input )
|
||||
case BMW:
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
if ( i == 0 )
|
||||
bmw512_4way( &ctx.bmw, input, size );
|
||||
bmw512_4way_update( &ctx.bmw, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
bmw512_4way( &ctx.bmw, vhash, size );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, size );
|
||||
}
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -501,11 +501,11 @@ void x16r_4way_hash( void* output, const void* input )
|
||||
case SKEIN:
|
||||
skein512_4way_init( &ctx.skein );
|
||||
if ( i == 0 )
|
||||
skein512_4way( &ctx.skein, input, size );
|
||||
skein512_4way_update( &ctx.skein, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
skein512_4way( &ctx.skein, vhash, size );
|
||||
skein512_4way_update( &ctx.skein, vhash, size );
|
||||
}
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -513,11 +513,11 @@ void x16r_4way_hash( void* output, const void* input )
|
||||
case JH:
|
||||
jh512_4way_init( &ctx.jh );
|
||||
if ( i == 0 )
|
||||
jh512_4way( &ctx.jh, input, size );
|
||||
jh512_4way_update( &ctx.jh, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
jh512_4way( &ctx.jh, vhash, size );
|
||||
jh512_4way_update( &ctx.jh, vhash, size );
|
||||
}
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -525,11 +525,11 @@ void x16r_4way_hash( void* output, const void* input )
|
||||
case KECCAK:
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
if ( i == 0 )
|
||||
keccak512_4way( &ctx.keccak, input, size );
|
||||
keccak512_4way_update( &ctx.keccak, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
keccak512_4way( &ctx.keccak, vhash, size );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, size );
|
||||
}
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -599,7 +599,7 @@ void x16r_4way_hash( void* output, const void* input )
|
||||
case HAMSI:
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, size );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, size );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
@@ -620,7 +620,7 @@ void x16r_4way_hash( void* output, const void* input )
|
||||
case SHABAL:
|
||||
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, size );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, size );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
@@ -641,7 +641,7 @@ void x16r_4way_hash( void* output, const void* input )
|
||||
case SHA_512:
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, size );
|
||||
sha512_4way_update( &ctx.sha512, vhash, size );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
|
@@ -52,10 +52,10 @@ bool register_x16r_algo( algo_gate_t* gate )
|
||||
|
||||
bool register_x16rv2_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_8WAY)
|
||||
#if defined (X16RV2_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16rv2_8way;
|
||||
gate->hash = (void*)&x16rv2_8way_hash;
|
||||
#elif defined (X16R_4WAY)
|
||||
#elif defined (X16RV2_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16rv2_4way;
|
||||
gate->hash = (void*)&x16rv2_4way_hash;
|
||||
#else
|
||||
@@ -205,10 +205,10 @@ void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
|
||||
bool register_x16rt_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_8WAY)
|
||||
#if defined (X16RT_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16rt_8way;
|
||||
gate->hash = (void*)&x16rt_8way_hash;
|
||||
#elif defined (X16R_4WAY)
|
||||
#elif defined (X16RT_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16rt_4way;
|
||||
gate->hash = (void*)&x16rt_4way_hash;
|
||||
#else
|
||||
@@ -222,10 +222,10 @@ bool register_x16rt_algo( algo_gate_t* gate )
|
||||
|
||||
bool register_x16rt_veil_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_8WAY)
|
||||
#if defined (X16RT_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16rt_8way;
|
||||
gate->hash = (void*)&x16rt_8way_hash;
|
||||
#elif defined (X16R_4WAY)
|
||||
#elif defined (X16RT_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16rt_4way;
|
||||
gate->hash = (void*)&x16rt_4way_hash;
|
||||
#else
|
||||
@@ -258,16 +258,23 @@ bool register_hex_algo( algo_gate_t* gate )
|
||||
|
||||
bool register_x21s_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_4WAY)
|
||||
#if defined (X21S_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x21s_8way;
|
||||
gate->hash = (void*)&x21s_8way_hash;
|
||||
gate->miner_thread_init = (void*)&x21s_8way_thread_init;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||
#elif defined (X21S_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x21s_4way;
|
||||
gate->hash = (void*)&x21s_4way_hash;
|
||||
gate->miner_thread_init = (void*)&x21s_4way_thread_init;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x21s;
|
||||
gate->hash = (void*)&x21s_hash;
|
||||
gate->miner_thread_init = (void*)&x21s_thread_init;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
|
||||
// gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
|
||||
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
|
@@ -12,6 +12,24 @@
|
||||
#define X16R_4WAY 1
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define X16RV2_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define X16RV2_4WAY 1
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define X16RT_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define X16RT_4WAY 1
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define X21S_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define X21S_4WAY 1
|
||||
#endif
|
||||
|
||||
enum x16r_Algo {
|
||||
BLAKE = 0,
|
||||
BMW,
|
||||
@@ -46,18 +64,13 @@ bool register_x16rt_algo( algo_gate_t* gate );
|
||||
bool register_hex__algo( algo_gate_t* gate );
|
||||
bool register_x21s__algo( algo_gate_t* gate );
|
||||
|
||||
// x16r, x16s
|
||||
#if defined(X16R_8WAY)
|
||||
|
||||
void x16r_8way_hash( void *state, const void *input );
|
||||
int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x16rv2_8way_hash( void *state, const void *input );
|
||||
int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void x16rt_8way_hash( void *state, const void *input );
|
||||
int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(X16R_4WAY)
|
||||
|
||||
@@ -65,31 +78,65 @@ void x16r_4way_hash( void *state, const void *input );
|
||||
int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x16rv2_4way_hash( void *state, const void *input );
|
||||
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x16rt_4way_hash( void *state, const void *input );
|
||||
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#else
|
||||
|
||||
void x16r_hash( void *state, const void *input );
|
||||
int scanhash_x16r( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
// x16Rv2
|
||||
#if defined(X16RV2_8WAY)
|
||||
|
||||
void x16rv2_8way_hash( void *state, const void *input );
|
||||
int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(X16RV2_4WAY)
|
||||
|
||||
void x16rv2_4way_hash( void *state, const void *input );
|
||||
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#else
|
||||
|
||||
void x16rv2_hash( void *state, const void *input );
|
||||
int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
// x16rt, veil
|
||||
#if defined(X16RT_8WAY)
|
||||
|
||||
void x16rt_8way_hash( void *state, const void *input );
|
||||
int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(X16RT_4WAY)
|
||||
|
||||
void x16rt_4way_hash( void *state, const void *input );
|
||||
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#else
|
||||
|
||||
void x16rt_hash( void *state, const void *input );
|
||||
int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(X16R_4WAY)
|
||||
// x21s
|
||||
#if defined(X21S_8WAY)
|
||||
|
||||
void x21s_8way_hash( void *state, const void *input );
|
||||
int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool x21s_8way_thread_init();
|
||||
|
||||
#elif defined(X21S_4WAY)
|
||||
|
||||
void x21s_4way_hash( void *state, const void *input );
|
||||
int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
||||
|
@@ -24,7 +24,7 @@
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
#if defined (X16R_8WAY)
|
||||
#if defined (X16RT_8WAY)
|
||||
|
||||
union _x16rt_8way_context_overlay
|
||||
{
|
||||
@@ -407,7 +407,7 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined (X16R_4WAY)
|
||||
#elif defined (X16RT_4WAY)
|
||||
|
||||
union _x16rt_4way_context_overlay
|
||||
{
|
||||
@@ -458,11 +458,11 @@ void x16rt_4way_hash( void* output, const void* input )
|
||||
case BLAKE:
|
||||
blake512_4way_init( &ctx.blake );
|
||||
if ( i == 0 )
|
||||
blake512_4way( &ctx.blake, input, size );
|
||||
blake512_4way_update( &ctx.blake, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
blake512_4way( &ctx.blake, vhash, size );
|
||||
blake512_4way_update( &ctx.blake, vhash, size );
|
||||
}
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -470,11 +470,11 @@ void x16rt_4way_hash( void* output, const void* input )
|
||||
case BMW:
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
if ( i == 0 )
|
||||
bmw512_4way( &ctx.bmw, input, size );
|
||||
bmw512_4way_update( &ctx.bmw, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
bmw512_4way( &ctx.bmw, vhash, size );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, size );
|
||||
}
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -496,11 +496,11 @@ void x16rt_4way_hash( void* output, const void* input )
|
||||
case SKEIN:
|
||||
skein512_4way_init( &ctx.skein );
|
||||
if ( i == 0 )
|
||||
skein512_4way( &ctx.skein, input, size );
|
||||
skein512_4way_update( &ctx.skein, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
skein512_4way( &ctx.skein, vhash, size );
|
||||
skein512_4way_update( &ctx.skein, vhash, size );
|
||||
}
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -508,11 +508,11 @@ void x16rt_4way_hash( void* output, const void* input )
|
||||
case JH:
|
||||
jh512_4way_init( &ctx.jh );
|
||||
if ( i == 0 )
|
||||
jh512_4way( &ctx.jh, input, size );
|
||||
jh512_4way_update( &ctx.jh, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
jh512_4way( &ctx.jh, vhash, size );
|
||||
jh512_4way_update( &ctx.jh, vhash, size );
|
||||
}
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -520,11 +520,11 @@ void x16rt_4way_hash( void* output, const void* input )
|
||||
case KECCAK:
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
if ( i == 0 )
|
||||
keccak512_4way( &ctx.keccak, input, size );
|
||||
keccak512_4way_update( &ctx.keccak, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
keccak512_4way( &ctx.keccak, vhash, size );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, size );
|
||||
}
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -594,7 +594,7 @@ void x16rt_4way_hash( void* output, const void* input )
|
||||
case HAMSI:
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, size );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, size );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
@@ -615,7 +615,7 @@ void x16rt_4way_hash( void* output, const void* input )
|
||||
case SHABAL:
|
||||
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, size );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, size );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
@@ -636,7 +636,7 @@ void x16rt_4way_hash( void* output, const void* input )
|
||||
case SHA_512:
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, size );
|
||||
sha512_4way_update( &ctx.sha512, vhash, size );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
|
@@ -31,7 +31,7 @@
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
#if defined (X16R_8WAY)
|
||||
#if defined (X16RV2_8WAY)
|
||||
|
||||
union _x16rv2_8way_context_overlay
|
||||
{
|
||||
@@ -497,10 +497,7 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#elif defined (X16R_4WAY)
|
||||
|
||||
|
||||
#elif defined (X16RV2_4WAY)
|
||||
|
||||
union _x16rv2_4way_context_overlay
|
||||
{
|
||||
@@ -556,11 +553,11 @@ void x16rv2_4way_hash( void* output, const void* input )
|
||||
case BLAKE:
|
||||
blake512_4way_init( &ctx.blake );
|
||||
if ( i == 0 )
|
||||
blake512_4way( &ctx.blake, input, size );
|
||||
blake512_4way_update( &ctx.blake, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
blake512_4way( &ctx.blake, vhash, size );
|
||||
blake512_4way_update( &ctx.blake, vhash, size );
|
||||
}
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -568,11 +565,11 @@ void x16rv2_4way_hash( void* output, const void* input )
|
||||
case BMW:
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
if ( i == 0 )
|
||||
bmw512_4way( &ctx.bmw, input, size );
|
||||
bmw512_4way_update( &ctx.bmw, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
bmw512_4way( &ctx.bmw, vhash, size );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, size );
|
||||
}
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -594,11 +591,11 @@ void x16rv2_4way_hash( void* output, const void* input )
|
||||
case SKEIN:
|
||||
skein512_4way_init( &ctx.skein );
|
||||
if ( i == 0 )
|
||||
skein512_4way( &ctx.skein, input, size );
|
||||
skein512_4way_update( &ctx.skein, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
skein512_4way( &ctx.skein, vhash, size );
|
||||
skein512_4way_update( &ctx.skein, vhash, size );
|
||||
}
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -606,11 +603,11 @@ void x16rv2_4way_hash( void* output, const void* input )
|
||||
case JH:
|
||||
jh512_4way_init( &ctx.jh );
|
||||
if ( i == 0 )
|
||||
jh512_4way( &ctx.jh, input, size );
|
||||
jh512_4way_update( &ctx.jh, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
jh512_4way( &ctx.jh, vhash, size );
|
||||
jh512_4way_update( &ctx.jh, vhash, size );
|
||||
}
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -634,7 +631,7 @@ void x16rv2_4way_hash( void* output, const void* input )
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
@@ -724,7 +721,7 @@ void x16rv2_4way_hash( void* output, const void* input )
|
||||
case HAMSI:
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, size );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, size );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
@@ -745,7 +742,7 @@ void x16rv2_4way_hash( void* output, const void* input )
|
||||
case SHABAL:
|
||||
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, size );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, size );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
@@ -782,7 +779,7 @@ void x16rv2_4way_hash( void* output, const void* input )
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
|
@@ -1,13 +1,10 @@
|
||||
/**
|
||||
* x16r algo implementation
|
||||
* x21s algo implementation
|
||||
*
|
||||
* Implementation by tpruvot@github Jan 2018
|
||||
* Optimized by JayDDee@github Jan 2018
|
||||
*/
|
||||
#include "x16r-gate.h"
|
||||
|
||||
#if defined (X16R_4WAY)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@@ -21,6 +18,7 @@
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/luffa/luffa-hash-2way.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||
@@ -38,6 +36,483 @@
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
#if defined (X21S_8WAY)
|
||||
|
||||
static __thread uint64_t* x21s_8way_matrix;
|
||||
|
||||
union _x21s_8way_context_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cube_4way_context cube;
|
||||
sph_shavite512_context shavite;
|
||||
simd_4way_context simd;
|
||||
hashState_echo echo;
|
||||
hamsi512_8way_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
shabal512_8way_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_8way_context sha512;
|
||||
haval256_5_8way_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
sha256_8way_context sha256;
|
||||
} __attribute__ ((aligned (64)));
|
||||
|
||||
typedef union _x21s_8way_context_overlay x21s_8way_context_overlay;
|
||||
|
||||
void x21s_8way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t vhash[24*8] __attribute__ ((aligned (128)));
|
||||
uint32_t hash0[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[24] __attribute__ ((aligned (64)));
|
||||
x21s_8way_context_overlay ctx;
|
||||
void *in0 = (void*) hash0;
|
||||
void *in1 = (void*) hash1;
|
||||
void *in2 = (void*) hash2;
|
||||
void *in3 = (void*) hash3;
|
||||
void *in4 = (void*) hash4;
|
||||
void *in5 = (void*) hash5;
|
||||
void *in6 = (void*) hash6;
|
||||
void *in7 = (void*) hash7;
|
||||
int size = 80;
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
input, 640 );
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch ( algo )
|
||||
{
|
||||
case BLAKE:
|
||||
blake512_8way_init( &ctx.blake );
|
||||
if ( i == 0 )
|
||||
blake512_8way_update( &ctx.blake, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
blake512_8way_update( &ctx.blake, vhash, size );
|
||||
}
|
||||
blake512_8way_close( &ctx.blake, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case BMW:
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
if ( i == 0 )
|
||||
bmw512_8way_update( &ctx.bmw, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, size );
|
||||
}
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case GROESTL:
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(const char*)in0, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(const char*)in1, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(const char*)in2, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(const char*)in3, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash4,
|
||||
(const char*)in4, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash5,
|
||||
(const char*)in5, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash6,
|
||||
(const char*)in6, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash7,
|
||||
(const char*)in7, size<<3 );
|
||||
break;
|
||||
case SKEIN:
|
||||
skein512_8way_init( &ctx.skein );
|
||||
if ( i == 0 )
|
||||
skein512_8way_update( &ctx.skein, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
skein512_8way_update( &ctx.skein, vhash, size );
|
||||
}
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case JH:
|
||||
jh512_8way_init( &ctx.jh );
|
||||
if ( i == 0 )
|
||||
jh512_8way_update( &ctx.jh, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
jh512_8way_update( &ctx.jh, vhash, size );
|
||||
}
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case KECCAK:
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
if ( i == 0 )
|
||||
keccak512_8way_update( &ctx.keccak, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, size );
|
||||
}
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case LUFFA:
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhash, vhash, size );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhash, vhash, size);
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
break;
|
||||
case SHAVITE:
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in0, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in1, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in2, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in3, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in4, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash4 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in5, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash5 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in6, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash6 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in7, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash7 );
|
||||
break;
|
||||
case SIMD:
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
break;
|
||||
case ECHO:
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence*)in0, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence*)in1, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence*)in2, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence*)in3, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash4,
|
||||
(const BitSequence*)in4, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash5,
|
||||
(const BitSequence*)in5, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash6,
|
||||
(const BitSequence*)in6, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash7,
|
||||
(const BitSequence*)in7, size<<3 );
|
||||
break;
|
||||
case HAMSI:
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, size );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in0, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in1, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in2, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in3, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in4, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash4 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in5, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash5 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in6, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash6 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in7, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash7 );
|
||||
break;
|
||||
case SHABAL:
|
||||
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhash, size );
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in0, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in1, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in2, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in3, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in4, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash4 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in5, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash5 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in6, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash6 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in7, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash7 );
|
||||
break;
|
||||
case SHA_512:
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, size );
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
}
|
||||
size = 64;
|
||||
}
|
||||
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
haval256_5_8way_init( &ctx.haval );
|
||||
haval256_5_8way_update( &ctx.haval, vhash, 64 );
|
||||
haval256_5_8way_close( &ctx.haval, vhash );
|
||||
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash0, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash0 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash1, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash1 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash2, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash2 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash3, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash3 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash4, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash4 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash5, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash5 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash6, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash6 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash7, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash7 );
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2REV2_2WAY( x21s_8way_matrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2REV2_2WAY( x21s_8way_matrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2REV2_2WAY( x21s_8way_matrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2REV2_2WAY( x21s_8way_matrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash0, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash0 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash1, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash1 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash2, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash2 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash3, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash3 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash4, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash4 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash5, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash5 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash6, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash6 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash7, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash7 );
|
||||
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
sha256_8way_init( &ctx.sha256 );
|
||||
sha256_8way_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_8way_close( &ctx.sha256, output );
|
||||
}
|
||||
|
||||
int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &hash[7<<3];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t bedata1[2] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
int thr_id = mythr->id;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
|
||||
bedata1[0] = bswap_32( pdata[1] );
|
||||
bedata1[1] = bswap_32( pdata[2] );
|
||||
uint32_t ntime = bswap_32( pdata[17] );
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
x21s_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash7[lane] <= Htarg ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( ( n < last_nonce ) && !(*restart) );
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool x21s_8way_thread_init()
|
||||
{
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols
|
||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
const int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
||||
x21s_8way_matrix = _mm_malloc( 2 * size, 64 );
|
||||
return x21s_8way_matrix;
|
||||
}
|
||||
|
||||
#elif defined (X21S_4WAY)
|
||||
|
||||
static __thread uint64_t* x21s_4way_matrix;
|
||||
|
||||
union _x21s_4way_context_overlay
|
||||
@@ -103,11 +578,11 @@ void x21s_4way_hash( void* output, const void* input )
|
||||
case BLAKE:
|
||||
blake512_4way_init( &ctx.blake );
|
||||
if ( i == 0 )
|
||||
blake512_4way( &ctx.blake, input, size );
|
||||
blake512_4way_update( &ctx.blake, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
blake512_4way( &ctx.blake, vhash, size );
|
||||
blake512_4way_update( &ctx.blake, vhash, size );
|
||||
}
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -115,11 +590,11 @@ void x21s_4way_hash( void* output, const void* input )
|
||||
case BMW:
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
if ( i == 0 )
|
||||
bmw512_4way( &ctx.bmw, input, size );
|
||||
bmw512_4way_update( &ctx.bmw, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
bmw512_4way( &ctx.bmw, vhash, size );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, size );
|
||||
}
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -141,11 +616,11 @@ void x21s_4way_hash( void* output, const void* input )
|
||||
case SKEIN:
|
||||
skein512_4way_init( &ctx.skein );
|
||||
if ( i == 0 )
|
||||
skein512_4way( &ctx.skein, input, size );
|
||||
skein512_4way_update( &ctx.skein, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
skein512_4way( &ctx.skein, vhash, size );
|
||||
skein512_4way_update( &ctx.skein, vhash, size );
|
||||
}
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -153,11 +628,11 @@ void x21s_4way_hash( void* output, const void* input )
|
||||
case JH:
|
||||
jh512_4way_init( &ctx.jh );
|
||||
if ( i == 0 )
|
||||
jh512_4way( &ctx.jh, input, size );
|
||||
jh512_4way_update( &ctx.jh, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
jh512_4way( &ctx.jh, vhash, size );
|
||||
jh512_4way_update( &ctx.jh, vhash, size );
|
||||
}
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -165,11 +640,11 @@ void x21s_4way_hash( void* output, const void* input )
|
||||
case KECCAK:
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
if ( i == 0 )
|
||||
keccak512_4way( &ctx.keccak, input, size );
|
||||
keccak512_4way_update( &ctx.keccak, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
keccak512_4way( &ctx.keccak, vhash, size );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, size );
|
||||
}
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -239,7 +714,7 @@ void x21s_4way_hash( void* output, const void* input )
|
||||
case HAMSI:
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, size );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, size );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
@@ -260,7 +735,7 @@ void x21s_4way_hash( void* output, const void* input )
|
||||
case SHABAL:
|
||||
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, size );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, size );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
@@ -281,7 +756,7 @@ void x21s_4way_hash( void* output, const void* input )
|
||||
case SHA_512:
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, size );
|
||||
sha512_4way_update( &ctx.sha512, vhash, size );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
@@ -292,7 +767,7 @@ void x21s_4way_hash( void* output, const void* input )
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhash, 64 );
|
||||
haval256_5_4way_update( &ctx.haval, vhash, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
@@ -356,7 +831,7 @@ void x21s_4way_hash( void* output, const void* input )
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
sha256_4way_init( &ctx.sha256 );
|
||||
sha256_4way( &ctx.sha256, vhash, 64 );
|
||||
sha256_4way_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_4way_close( &ctx.sha256, vhash );
|
||||
dintrlv_4x32( output, output+32, output+64,output+96, vhash, 256 );
|
||||
|
||||
|
@@ -1319,7 +1319,7 @@ int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
@@ -1350,8 +1350,6 @@ int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#elif defined(SONOA_4WAY)
|
||||
|
||||
union _sonoa_4way_context_overlay
|
||||
@@ -1391,11 +1389,11 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
// 1
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -1412,15 +1410,15 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
@@ -1466,7 +1464,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -1483,15 +1481,15 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
@@ -1535,13 +1533,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
// 3
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -1558,15 +1556,15 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
@@ -1610,7 +1608,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -1632,7 +1630,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -1649,15 +1647,15 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
@@ -1701,7 +1699,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -1722,13 +1720,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
rintrlv_4x32_4x64( vhashB, vhash, 512 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhashB, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhashB, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -1758,13 +1756,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
rintrlv_4x64_4x32( vhashB, vhash, 512 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhashB, 64 );
|
||||
shabal512_4way_update( &ctx.shabal, vhashB, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -1781,15 +1779,15 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
@@ -1833,7 +1831,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -1854,7 +1852,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -1877,7 +1875,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -1894,15 +1892,15 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
@@ -1946,7 +1944,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -1967,7 +1965,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -1988,7 +1986,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -2011,7 +2009,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -2028,15 +2026,15 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
@@ -2080,7 +2078,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -2101,7 +2099,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -2122,13 +2120,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
|
||||
rintrlv_4x64_4x32( vhashB, vhash, 512 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhashB, 64 );
|
||||
haval256_5_4way_update( &ctx.haval, vhashB, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, state );
|
||||
}
|
||||
|
||||
|
@@ -360,12 +360,12 @@ void x17_4way_hash( void *state, const void *input )
|
||||
|
||||
// 1 Blake parallel 4 way 64 bit
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
// 2 Bmw
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serialize
|
||||
@@ -386,17 +386,17 @@ void x17_4way_hash( void *state, const void *input )
|
||||
|
||||
// 4 Skein parallel 4 way 64 bit
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
// 5 JH
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
// 6 Keccak
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
// 7 Luffa parallel 2 way 128 bit
|
||||
@@ -428,7 +428,6 @@ void x17_4way_hash( void *state, const void *input )
|
||||
dintrlv_2x128_512( hash0, hash1, vhashA );
|
||||
dintrlv_2x128_512( hash2, hash3, vhashB );
|
||||
|
||||
|
||||
// 11 Echo serial
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
@@ -447,7 +446,7 @@ void x17_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -470,7 +469,7 @@ void x17_4way_hash( void *state, const void *input )
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
@@ -493,14 +492,14 @@ void x17_4way_hash( void *state, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
|
||||
// 17 Haval parallel 32 bit
|
||||
rintrlv_4x64_4x32( vhashB, vhash, 512 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhashB, 64 );
|
||||
haval256_5_4way_update( &ctx.haval, vhashB, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, state );
|
||||
}
|
||||
|
||||
|
@@ -569,12 +569,12 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
// parallel 4 way
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close(&ctx.blake, vhash);
|
||||
memset( &vhash[8<<2], 0, 64<<2 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
@@ -597,15 +597,15 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, dataLen );
|
||||
skein512_4way_update( &ctx.skein, vhash, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, dataLen );
|
||||
jh512_4way_update( &ctx.jh, vhash, dataLen );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, dataLen );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, dataLen );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
@@ -649,7 +649,7 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
@@ -671,7 +671,7 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, dataLen );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, dataLen );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
@@ -693,13 +693,13 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, dataLen );
|
||||
sha512_4way_update( &ctx.sha512, vhash, dataLen );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
|
||||
rintrlv_4x64_4x32( vhashA, vhash, dataLen<<3 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhashA, dataLen );
|
||||
haval256_5_4way_update( &ctx.haval, vhashA, dataLen );
|
||||
haval256_5_4way_close( &ctx.haval, vhashA );
|
||||
|
||||
rintrlv_4x32_4x64( vhash, vhashA, dataLen<<3 );
|
||||
@@ -707,11 +707,11 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
memset( &vhash[ 4<<2 ], 0, (dataLen-32) << 2 );
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, vhash, dataLen );
|
||||
blake512_4way_update( &ctx.blake, vhash, dataLen );
|
||||
blake512_4way_close(&ctx.blake, vhash);
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
@@ -732,15 +732,15 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, dataLen );
|
||||
skein512_4way_update( &ctx.skein, vhash, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, dataLen );
|
||||
jh512_4way_update( &ctx.jh, vhash, dataLen );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, dataLen );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, dataLen );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
@@ -784,7 +784,7 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
@@ -805,7 +805,7 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, dataLen );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, dataLen );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
@@ -826,13 +826,13 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, dataLen );
|
||||
sha512_4way_update( &ctx.sha512, vhash, dataLen );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
|
||||
rintrlv_4x64_4x32( vhashA, vhash, dataLen<<3 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhashA, dataLen );
|
||||
haval256_5_4way_update( &ctx.haval, vhashA, dataLen );
|
||||
haval256_5_4way_close( &ctx.haval, output );
|
||||
}
|
||||
|
||||
|
@@ -1,7 +1,4 @@
|
||||
#include "x22i-gate.h"
|
||||
|
||||
#if defined(X22I_4WAY)
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
@@ -12,6 +9,7 @@
|
||||
#include "algo/luffa/luffa-hash-2way.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
#include "algo/shavite/shavite-hash-2way.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||
@@ -25,6 +23,424 @@
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/swifftx/swifftx.h"
|
||||
|
||||
|
||||
#if defined(X22I_8WAY)
|
||||
|
||||
union _x22i_8way_ctx_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cube_4way_context cube;
|
||||
sph_shavite512_context shavite;
|
||||
simd_4way_context simd;
|
||||
hamsi512_8way_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
shabal512_8way_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_8way_context sha512;
|
||||
haval256_5_8way_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
sha256_8way_context sha256;
|
||||
};
|
||||
typedef union _x22i_8way_ctx_overlay x22i_8way_ctx_overlay;
|
||||
|
||||
void x22i_8way_hash( void *output, const void *input )
|
||||
{
|
||||
uint64_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||
uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashB[8*8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash0[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash4[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash5[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash6[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash7[8*4] __attribute__ ((aligned (64)));
|
||||
|
||||
// unsigned char hash[64 * 4] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hashA0[64] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hashA1[64] __attribute__((aligned(32))) = {0};
|
||||
unsigned char hashA2[64] __attribute__((aligned(32))) = {0};
|
||||
unsigned char hashA3[64] __attribute__((aligned(32))) = {0};
|
||||
unsigned char hashA4[64] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hashA5[64] __attribute__((aligned(32))) = {0};
|
||||
unsigned char hashA6[64] __attribute__((aligned(32))) = {0};
|
||||
unsigned char hashA7[64] __attribute__((aligned(32))) = {0};
|
||||
x22i_8way_ctx_overlay ctx;
|
||||
|
||||
blake512_8way_init( &ctx.blake );
|
||||
blake512_8way_update( &ctx.blake, input, 80 );
|
||||
blake512_8way_close( &ctx.blake, vhash );
|
||||
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(const char*)hash0, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(const char*)hash1, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(const char*)hash2, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(const char*)hash3, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash4,
|
||||
(const char*)hash4, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash5,
|
||||
(const char*)hash5, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash6,
|
||||
(const char*)hash6, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash7,
|
||||
(const char*)hash7, 512 );
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 64 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 64 );
|
||||
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
|
||||
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash0, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash1, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash2, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash3, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash4, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash4 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash5, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash5 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash6, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash6 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash7, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash7 );
|
||||
|
||||
intrlv_4x128_512( vhashA, hash0, hash1, hash2, hash3 );
|
||||
intrlv_4x128_512( vhashB, hash4, hash5, hash6, hash7 );
|
||||
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhashA, vhashA, 512 );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash0,
|
||||
(const BitSequence*)hash0, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash1,
|
||||
(const BitSequence*)hash1, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash2,
|
||||
(const BitSequence*)hash2, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash3,
|
||||
(const BitSequence*)hash3, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash4,
|
||||
(const BitSequence*)hash4, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash5,
|
||||
(const BitSequence*)hash5, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash6,
|
||||
(const BitSequence*)hash6, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash7,
|
||||
(const BitSequence*)hash7, 512 );
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash1, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash2, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash4, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash4 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash5, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash5 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash6, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash6 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash7, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash7 );
|
||||
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
shabal512_8way_init( &ctx.shabal );
|
||||
shabal512_8way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_8way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_8x32_512( &hash0[8], &hash1[8], &hash2[8], &hash3[8],
|
||||
&hash4[8], &hash5[8], &hash6[8], &hash7[8], vhash );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash0[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash0[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash1[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash1[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash2[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash2[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash3[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash3[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash4[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash4[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash5[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash5[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash6[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash6[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash7[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash7[16] );
|
||||
|
||||
intrlv_8x64_512( vhash, &hash0[16], &hash1[16], &hash2[16], &hash3[16],
|
||||
&hash4[16], &hash5[16], &hash6[16], &hash7[16] );
|
||||
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
|
||||
dintrlv_8x64_512( &hash0[24], &hash1[24], &hash2[24], &hash3[24],
|
||||
&hash4[24], &hash5[24], &hash6[24], &hash7[24], vhash );
|
||||
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash0, (unsigned char*)hashA0);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash1, (unsigned char*)hashA1);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash2, (unsigned char*)hashA2);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash3, (unsigned char*)hashA3);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash4, (unsigned char*)hashA4);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash5, (unsigned char*)hashA5);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash6, (unsigned char*)hashA6);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash7, (unsigned char*)hashA7);
|
||||
|
||||
intrlv_8x32_512( vhashA, hashA0, hashA1, hashA2, hashA3,
|
||||
hashA4, hashA5, hashA6, hashA7 );
|
||||
|
||||
memset( vhash, 0, 64*8 );
|
||||
|
||||
haval256_5_8way_init( &ctx.haval );
|
||||
haval256_5_8way_update( &ctx.haval, vhashA, 64 );
|
||||
haval256_5_8way_close( &ctx.haval, vhash );
|
||||
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
|
||||
memset( hashA0, 0, 64 );
|
||||
memset( hashA1, 0, 64 );
|
||||
memset( hashA2, 0, 64 );
|
||||
memset( hashA3, 0, 64 );
|
||||
memset( hashA4, 0, 64 );
|
||||
memset( hashA5, 0, 64 );
|
||||
memset( hashA6, 0, 64 );
|
||||
memset( hashA7, 0, 64 );
|
||||
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash0, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA0);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash1, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA1);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash2, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA2);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash3, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA3);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash4, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA4);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash5, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA5);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash6, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA6);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash7, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA7);
|
||||
|
||||
memset( hash0, 0, 64 );
|
||||
memset( hash1, 0, 64 );
|
||||
memset( hash2, 0, 64 );
|
||||
memset( hash3, 0, 64 );
|
||||
memset( hash4, 0, 64 );
|
||||
memset( hash5, 0, 64 );
|
||||
memset( hash6, 0, 64 );
|
||||
memset( hash7, 0, 64 );
|
||||
|
||||
intrlv_2x256( vhash, hashA0, hashA1, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hashA2, hashA3, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hashA4, hashA5, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hashA6, hashA7, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash0, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash0 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash1, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash1 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash2, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash2 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash3, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash3 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash4, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash4 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash5, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash5 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash6, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash6 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash7, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash7 );
|
||||
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
sha256_8way_init( &ctx.sha256 );
|
||||
sha256_8way_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_8way_close( &ctx.sha256, output );
|
||||
}
|
||||
|
||||
int scanhash_x22i_8way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x22i_8way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if unlikely( ( hash7[ lane ] <= Htarg ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined(X22I_4WAY)
|
||||
|
||||
union _x22i_4way_ctx_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
@@ -59,8 +475,6 @@ void x22i_4way_hash( void *output, const void *input )
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
|
||||
|
||||
// unsigned char hash[64 * 4] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hashA0[64] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hashA1[64] __attribute__((aligned(32))) = {0};
|
||||
unsigned char hashA2[64] __attribute__((aligned(32))) = {0};
|
||||
@@ -68,13 +482,12 @@ void x22i_4way_hash( void *output, const void *input )
|
||||
x22i_ctx_overlay ctx;
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
@@ -93,15 +506,15 @@ void x22i_4way_hash( void *output, const void *input )
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
@@ -142,13 +555,11 @@ void x22i_4way_hash( void *output, const void *input )
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash3,
|
||||
(const BitSequence*)hash3, 512 );
|
||||
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
@@ -167,9 +578,8 @@ void x22i_4way_hash( void *output, const void *input )
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_update( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32_512( &hash0[8], &hash1[8], &hash2[8], &hash3[8], vhash );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
@@ -188,12 +598,10 @@ void x22i_4way_hash( void *output, const void *input )
|
||||
intrlv_4x64_512( vhash, &hash0[16], &hash1[16], &hash2[16], &hash3[16] );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
|
||||
dintrlv_4x64_512( &hash0[24], &hash1[24], &hash2[24], &hash3[24], vhash );
|
||||
|
||||
// InitializeSWIFFTX();
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash0, (unsigned char*)hashA0);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash1, (unsigned char*)hashA1);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash2, (unsigned char*)hashA2);
|
||||
@@ -204,9 +612,8 @@ void x22i_4way_hash( void *output, const void *input )
|
||||
memset( vhash, 0, 64*4 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhashA, 64 );
|
||||
haval256_5_4way_update( &ctx.haval, vhashA, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, vhash );
|
||||
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
memset( hashA0, 0, 64 );
|
||||
@@ -257,10 +664,8 @@ void x22i_4way_hash( void *output, const void *input )
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
sha256_4way_init( &ctx.sha256 );
|
||||
sha256_4way( &ctx.sha256, vhash, 64 );
|
||||
sha256_4way_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_4way_close( &ctx.sha256, output );
|
||||
|
||||
// memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
|
||||
|
@@ -1,28 +1,44 @@
|
||||
#include "x22i-gate.h"
|
||||
|
||||
// Ryzen has poor AVX2 performance so use SHA over AVX2.
|
||||
// Intel has AVX512 so use AVX512 over SHA.
|
||||
// When Ryzen AVX2 improves use AVX2 over SHA.
|
||||
|
||||
bool register_x22i_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X22I_4WAY)
|
||||
#if defined (X22I_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x22i_8way;
|
||||
gate->hash = (void*)&x22i_8way_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||
#elif defined (X22I_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x22i_4way;
|
||||
gate->hash = (void*)&x22i_4way_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x22i;
|
||||
gate->hash = (void*)&x22i_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
bool register_x25x_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X22I_4WAY)
|
||||
#if defined (X25X_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x25x_8way;
|
||||
gate->hash = (void*)&x25x_8way_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||
#elif defined (X25X_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x25x_4way;
|
||||
gate->hash = (void*)&x25x_4way_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x25x;
|
||||
gate->hash = (void*)&x25x_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
|
||||
// gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -6,30 +6,60 @@
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define X22I_4WAY
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define X22I_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define X22I_4WAY 1
|
||||
#endif
|
||||
|
||||
bool register_x22i__algo( algo_gate_t* gate );
|
||||
bool register_x22i_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(X22I_4WAY)
|
||||
#if defined(X22I_8WAY)
|
||||
|
||||
void x22i_8way_hash( void *state, const void *input );
|
||||
int scanhash_x22i_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(X22I_4WAY)
|
||||
|
||||
void x22i_4way_hash( void *state, const void *input );
|
||||
int scanhash_x22i_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x25x_4way_hash( void *state, const void *input );
|
||||
int scanhash_x25x_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
#else
|
||||
|
||||
void x22i_hash( void *state, const void *input );
|
||||
int scanhash_x22i( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define X25X_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define X25X_4WAY 1
|
||||
#endif
|
||||
|
||||
bool register_x25i_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(X25X_8WAY)
|
||||
|
||||
void x25x_8way_hash( void *state, const void *input );
|
||||
int scanhash_x25x_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(X25X_4WAY)
|
||||
|
||||
void x25x_4way_hash( void *state, const void *input );
|
||||
int scanhash_x25x_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#else
|
||||
|
||||
void x25x_hash( void *state, const void *input );
|
||||
int scanhash_x25x( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
#endif // X22I_GATE_H__
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -4,7 +4,7 @@
|
||||
# during develpment. However the information contained may provide compilation
|
||||
# tips to users.
|
||||
|
||||
rm cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen
|
||||
rm cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen > /dev/null
|
||||
|
||||
make distclean || echo clean
|
||||
rm -f config.status
|
||||
|
27
build-avx2.sh
Executable file
27
build-avx2.sh
Executable file
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
|
||||
#if [ "$OS" = "Windows_NT" ]; then
|
||||
# ./mingw64.sh
|
||||
# exit 0
|
||||
#fi
|
||||
|
||||
# Linux build
|
||||
|
||||
make distclean || echo clean
|
||||
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
|
||||
# Ubuntu 10.04 (gcc 4.4)
|
||||
# extracflags="-O3 -march=native -Wall -D_REENTRANT -funroll-loops -fvariable-expansion-in-unroller -fmerge-all-constants -fbranch-target-load-optimize2 -fsched2-use-superblocks -falign-loops=16 -falign-functions=16 -falign-jumps=16 -falign-labels=16"
|
||||
|
||||
# Debian 7.7 / Ubuntu 14.04 (gcc 4.7+)
|
||||
#extracflags="$extracflags -Ofast -flto -fuse-linker-plugin -ftree-loop-if-convert-stores"
|
||||
|
||||
#CFLAGS="-O3 -march=native -Wall" ./configure --with-curl --with-crypto=$HOME/usr
|
||||
CFLAGS="-O3 -march=haswell -maes -Wall" ./configure --with-curl
|
||||
#CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
|
||||
|
||||
make -j 4
|
||||
|
||||
strip -s cpuminer
|
10
clean-all.sh
Executable file
10
clean-all.sh
Executable file
@@ -0,0 +1,10 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# imake clean and rm all the targetted executables.
|
||||
# tips to users.
|
||||
|
||||
rm cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen > /dev/null
|
||||
|
||||
rm cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-aes-avx.exe cpuminer-aes-sse42.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-sse2.exe cpuminer-zen.exe > /dev/null
|
||||
|
||||
make distclean
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.10.5.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.10.7.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.10.5'
|
||||
PACKAGE_STRING='cpuminer-opt 3.10.5'
|
||||
PACKAGE_VERSION='3.10.7'
|
||||
PACKAGE_STRING='cpuminer-opt 3.10.7'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.10.5 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.10.7 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1404,7 +1404,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.10.5:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.10.7:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1509,7 +1509,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.10.5
|
||||
cpuminer-opt configure 3.10.7
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.10.5, which was
|
||||
It was created by cpuminer-opt $as_me 3.10.7, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2993,7 +2993,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.10.5'
|
||||
VERSION='3.10.7'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.10.5, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.10.7, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.10.5
|
||||
cpuminer-opt config.status 3.10.7
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.10.5])
|
||||
AC_INIT([cpuminer-opt], [3.10.7])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
88
cpu-miner.c
88
cpu-miner.c
@@ -872,6 +872,7 @@ static uint64_t accept_sum = 0;
|
||||
static uint64_t reject_sum = 0;
|
||||
static double norm_diff_sum = 0.;
|
||||
static uint32_t last_block_height = 0;
|
||||
static bool new_job = false;
|
||||
static double last_targetdiff = 0.;
|
||||
static double ref_rate_hi = 0.;
|
||||
static double ref_rate_lo = 1e100;
|
||||
@@ -887,6 +888,7 @@ struct share_stats_t
|
||||
double share_diff;
|
||||
double stratum_diff;
|
||||
double target_diff;
|
||||
char job_id[32];
|
||||
};
|
||||
|
||||
#define s_stats_size 8
|
||||
@@ -1093,8 +1095,9 @@ static int share_result( int result, struct work *null_work,
|
||||
rejected_share_count, solved_block_count );
|
||||
|
||||
if ( have_stratum && !opt_quiet )
|
||||
applog2( LOG_INFO, "Share diff %.3g (%5f%%), block %d",
|
||||
my_stats.share_diff, share_ratio, stratum.block_height );
|
||||
applog2( LOG_INFO, "Share diff %.3g (%5f%%), block %d, job %s",
|
||||
my_stats.share_diff, share_ratio, stratum.block_height,
|
||||
my_stats.job_id );
|
||||
|
||||
if ( reason )
|
||||
{
|
||||
@@ -1762,6 +1765,7 @@ void work_set_target_ratio( struct work* work, uint32_t* hash )
|
||||
share_stats[ s_put_ptr ].net_diff = net_diff;
|
||||
share_stats[ s_put_ptr ].stratum_diff = stratum_diff;
|
||||
share_stats[ s_put_ptr ].target_diff = work->targetdiff;
|
||||
strcpy( share_stats[ s_put_ptr ].job_id, work->job_id );
|
||||
|
||||
s_put_ptr = stats_ptr_incr( s_put_ptr );
|
||||
|
||||
@@ -2586,28 +2590,38 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
|| ( last_block_height != sctx->block_height ) )
|
||||
{
|
||||
double hr = 0.;
|
||||
|
||||
new_job = false;
|
||||
pthread_mutex_lock( &stats_lock );
|
||||
|
||||
for ( int i = 0; i < opt_n_threads; i++ )
|
||||
hr += thr_hashrates[i];
|
||||
global_hashrate = hr;
|
||||
pthread_mutex_unlock( &stats_lock );
|
||||
|
||||
if ( stratum_diff != sctx->job.diff )
|
||||
applog( LOG_BLUE, "New stratum difficulty" );
|
||||
if ( last_block_height != sctx->block_height )
|
||||
applog( LOG_BLUE, "New block" );
|
||||
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
if ( stratum_diff != sctx->job.diff )
|
||||
applog( LOG_BLUE, "New stratum diff %g, block %d, job %s",
|
||||
sctx->job.diff, sctx->block_height, g_work->job_id );
|
||||
else if ( last_block_height != sctx->block_height )
|
||||
applog( LOG_BLUE, "New block %d, job %s", sctx->block_height,
|
||||
g_work->job_id );
|
||||
else
|
||||
applog( LOG_BLUE,"New job %s.", g_work->job_id );
|
||||
}
|
||||
|
||||
// Update data and calculate new estimates.
|
||||
stratum_diff = sctx->job.diff;
|
||||
last_block_height = stratum.block_height;
|
||||
last_targetdiff = g_work->targetdiff;
|
||||
|
||||
applog2( LOG_INFO, "%s %s block %d", short_url,
|
||||
algo_names[opt_algo], stratum.block_height );
|
||||
applog2( LOG_INFO, "Diff: net %g, stratum %g, target %g",
|
||||
net_diff, stratum_diff, last_targetdiff );
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
applog2( LOG_INFO, "%s %s block %d", short_url,
|
||||
algo_names[opt_algo], stratum.block_height );
|
||||
applog2( LOG_INFO, "Diff: net %g, stratum %g, target %g",
|
||||
net_diff, stratum_diff, last_targetdiff );
|
||||
}
|
||||
|
||||
if ( hr > 0. )
|
||||
{
|
||||
@@ -2619,10 +2633,13 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
sprintf_et( share_ttf, last_targetdiff * diff_to_hash / hr );
|
||||
scale_hash_for_display ( &hr, hr_units );
|
||||
|
||||
applog2( LOG_INFO, "TTF @ %.2f %sh/s: block %s, share %s",
|
||||
hr, hr_units, block_ttf, share_ttf );
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
applog2( LOG_INFO, "TTF @ %.2f %sh/s: block %s, share %s",
|
||||
hr, hr_units, block_ttf, share_ttf );
|
||||
}
|
||||
}
|
||||
}
|
||||
} // new diff/block
|
||||
}
|
||||
|
||||
void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
@@ -2700,25 +2717,23 @@ static void *stratum_thread(void *userdata )
|
||||
if ( stratum.job.job_id
|
||||
&& ( !g_work_time || strcmp( stratum.job.job_id, g_work.job_id ) ) )
|
||||
{
|
||||
new_job = true;
|
||||
pthread_mutex_lock(&g_work_lock);
|
||||
algo_gate.stratum_gen_work( &stratum, &g_work );
|
||||
time(&g_work_time);
|
||||
pthread_mutex_unlock(&g_work_lock);
|
||||
restart_threads();
|
||||
|
||||
/*
|
||||
if ( stratum.job.clean || jsonrpc_2 )
|
||||
{
|
||||
static uint32_t last_block_height;
|
||||
if ( last_block_height != stratum.block_height )
|
||||
{
|
||||
last_block_height = stratum.block_height;
|
||||
if ( !opt_quiet && last_block_height && new_job
|
||||
&& ( last_block_height == stratum.block_height ) )
|
||||
{
|
||||
new_job = false;
|
||||
applog( LOG_BLUE,"New job %s", g_work.job_id );
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
*/
|
||||
if (opt_debug && !opt_quiet)
|
||||
else if (opt_debug && !opt_quiet)
|
||||
{
|
||||
applog( LOG_BLUE, "%s asks job %d for block %d", short_url,
|
||||
strtoul( stratum.job.job_id, NULL, 16 ), stratum.block_height );
|
||||
@@ -2960,9 +2975,7 @@ void parse_arg(int key, char *arg )
|
||||
show_usage_and_exit(1);
|
||||
opt_retries = v;
|
||||
break;
|
||||
// case 'R':
|
||||
// applog(LOG_WARNING,"\n-R is no longer valid, use --retry-pause instead.");
|
||||
case 1025:
|
||||
case 1025:
|
||||
v = atoi(arg);
|
||||
if (v < 1 || v > 9999) /* sanity check */
|
||||
show_usage_and_exit(1);
|
||||
@@ -3018,11 +3031,14 @@ void parse_arg(int key, char *arg )
|
||||
*hp++ = '@';
|
||||
} else
|
||||
hp = ap;
|
||||
if (ap != arg) {
|
||||
if (strncasecmp(arg, "http://", 7) &&
|
||||
strncasecmp(arg, "https://", 8) &&
|
||||
strncasecmp(arg, "stratum+tcp://", 14)) {
|
||||
fprintf(stderr, "unknown protocol -- '%s'\n", arg);
|
||||
if ( ap != arg )
|
||||
{
|
||||
if ( strncasecmp( arg, "http://", 7 )
|
||||
&& strncasecmp( arg, "https://", 8 )
|
||||
&& strncasecmp( arg, "stratum+tcp://", 14 )
|
||||
&& strncasecmp( arg, "stratum+tcps://", 15 ) )
|
||||
{
|
||||
fprintf(stderr, "unknown protocol -- '%s'\n", arg);
|
||||
show_usage_and_exit(1);
|
||||
}
|
||||
free(rpc_url);
|
||||
@@ -3427,7 +3443,7 @@ bool check_cpu_capability ()
|
||||
else if ( sw_has_sse42 ) printf( " SSE4.2" );
|
||||
else if ( sw_has_sse2 ) printf( " SSE2 " );
|
||||
if ( sw_has_vaes ) printf( " VAES" );
|
||||
else if ( sw_has_aes ) printf( " AES " );
|
||||
else if ( sw_has_aes ) printf( " AES" );
|
||||
if ( sw_has_sha ) printf( " SHA" );
|
||||
|
||||
printf("\nAlgo features:");
|
||||
@@ -3439,7 +3455,7 @@ bool check_cpu_capability ()
|
||||
else if ( algo_has_sse42 ) printf( " SSE4.2" );
|
||||
else if ( algo_has_sse2 ) printf( " SSE2 " );
|
||||
if ( algo_has_vaes ) printf( " VAES" );
|
||||
else if ( algo_has_aes ) printf( " AES " );
|
||||
else if ( algo_has_aes ) printf( " AES" );
|
||||
if ( algo_has_sha ) printf( " SHA" );
|
||||
}
|
||||
printf("\n");
|
||||
@@ -3619,7 +3635,9 @@ int main(int argc, char *argv[])
|
||||
pthread_mutex_init( &stratum.sock_lock, NULL );
|
||||
pthread_mutex_init( &stratum.work_lock, NULL );
|
||||
|
||||
flags = !opt_benchmark && strncmp( rpc_url, "https:", 6 )
|
||||
flags = !opt_benchmark
|
||||
&& ( strncmp( rpc_url, "https:", 6 )
|
||||
|| strncasecmp(rpc_url, "stratum+tcps://", 15 ) )
|
||||
? ( CURL_GLOBAL_ALL & ~CURL_GLOBAL_SSL )
|
||||
: CURL_GLOBAL_ALL;
|
||||
if ( curl_global_init( flags ) )
|
||||
|
@@ -113,6 +113,9 @@ static inline __m512i m512_const_64( const uint64_t i7, const uint64_t i6,
|
||||
m512_const1_64( ( ( ( (uint64_t)(i1) << 32 ) ) \
|
||||
| ( (uint64_t)(i0) & 0xffffffff ) ) )
|
||||
|
||||
// { m128_1, m128_1, m128_0, m128_0 }
|
||||
#define m512_const_2x128( v1, v0 ) \
|
||||
m512_mask_blend_epi64( 0x0f, m512_const1_128( v1 ), m512_const1_128( v0 ) )
|
||||
|
||||
static inline __m512i m512_const4_64( const uint64_t i3, const uint64_t i2,
|
||||
const uint64_t i1, const uint64_t i0 )
|
||||
|
66
util.c
66
util.c
@@ -1069,7 +1069,7 @@ double target_to_diff(uint32_t* target)
|
||||
#define socket_blocks() (errno == EAGAIN || errno == EWOULDBLOCK)
|
||||
#endif
|
||||
|
||||
static bool send_line(curl_socket_t sock, char *s)
|
||||
static bool send_line( struct stratum_ctx *sctx, char *s )
|
||||
{
|
||||
size_t sent = 0;
|
||||
int len;
|
||||
@@ -1077,24 +1077,35 @@ static bool send_line(curl_socket_t sock, char *s)
|
||||
len = (int) strlen(s);
|
||||
s[len++] = '\n';
|
||||
|
||||
while (len > 0) {
|
||||
while ( len > 0 )
|
||||
{
|
||||
struct timeval timeout = {0, 0};
|
||||
int n;
|
||||
fd_set wd;
|
||||
|
||||
FD_ZERO(&wd);
|
||||
FD_SET(sock, &wd);
|
||||
if (select((int) (sock + 1), NULL, &wd, NULL, &timeout) < 1)
|
||||
FD_ZERO( &wd );
|
||||
FD_SET( sctx->sock, &wd );
|
||||
if ( select( (int) ( sctx->sock + 1 ), NULL, &wd, NULL, &timeout ) < 1 )
|
||||
return false;
|
||||
n = send(sock, s + sent, len, 0);
|
||||
if (n < 0) {
|
||||
if (!socket_blocks())
|
||||
return false;
|
||||
n = 0;
|
||||
}
|
||||
|
||||
#if LIBCURL_VERSION_NUM >= 0x071802
|
||||
|
||||
CURLcode rc = curl_easy_send(sctx->curl, s + sent, len, (size_t *)&n);
|
||||
if ( rc != CURLE_OK )
|
||||
{
|
||||
if ( rc != CURLE_AGAIN )
|
||||
#else
|
||||
n = send(sock, s + sent, len, 0);
|
||||
if ( n < 0 )
|
||||
{
|
||||
if ( !socket_blocks() )
|
||||
#endif
|
||||
return false;
|
||||
n = 0;
|
||||
}
|
||||
sent += n;
|
||||
len -= n;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1107,7 +1118,7 @@ bool stratum_send_line(struct stratum_ctx *sctx, char *s)
|
||||
applog(LOG_DEBUG, "> %s", s);
|
||||
|
||||
pthread_mutex_lock(&sctx->sock_lock);
|
||||
ret = send_line(sctx->sock, s);
|
||||
ret = send_line( sctx, s );
|
||||
pthread_mutex_unlock(&sctx->sock_lock);
|
||||
|
||||
return ret;
|
||||
@@ -1167,14 +1178,27 @@ char *stratum_recv_line(struct stratum_ctx *sctx)
|
||||
ssize_t n;
|
||||
|
||||
memset(s, 0, RBUFSIZE);
|
||||
n = recv(sctx->sock, s, RECVSIZE, 0);
|
||||
|
||||
#if LIBCURL_VERSION_NUM >= 0x071802
|
||||
|
||||
CURLcode rc = curl_easy_recv(sctx->curl, s, RECVSIZE, (size_t *)&n);
|
||||
if (rc == CURLE_OK && !n) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
if (rc != CURLE_OK) {
|
||||
if (rc != CURLE_AGAIN || !socket_full(sctx->sock, 1)) {
|
||||
#else
|
||||
|
||||
n = recv(sctx->sock, s, RECVSIZE, 0);
|
||||
if (!n) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
if (n < 0) {
|
||||
if (!socket_blocks() || !socket_full(sctx->sock, 1)) {
|
||||
ret = false;
|
||||
#endif
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
} else
|
||||
@@ -1244,7 +1268,9 @@ bool stratum_connect(struct stratum_ctx *sctx, const char *url)
|
||||
}
|
||||
free(sctx->curl_url);
|
||||
sctx->curl_url = (char*) malloc(strlen(url));
|
||||
sprintf(sctx->curl_url, "http%s", strstr(url, "://"));
|
||||
sprintf( sctx->curl_url, "http%s", strstr( url, "s://" )
|
||||
? strstr( url, "s://" )
|
||||
: strstr (url, "://" ) );
|
||||
|
||||
if (opt_protocol)
|
||||
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
|
||||
@@ -1254,7 +1280,9 @@ bool stratum_connect(struct stratum_ctx *sctx, const char *url)
|
||||
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, sctx->curl_err_str);
|
||||
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
|
||||
curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1);
|
||||
if (opt_proxy) {
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
|
||||
if (opt_proxy) {
|
||||
curl_easy_setopt(curl, CURLOPT_PROXY, opt_proxy);
|
||||
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, opt_proxy_type);
|
||||
}
|
||||
@@ -1954,7 +1982,9 @@ static bool stratum_reconnect(struct stratum_ctx *sctx, json_t *params)
|
||||
return false;
|
||||
|
||||
url = (char*) malloc(32 + strlen(host));
|
||||
sprintf(url, "stratum+tcp://%s:%d", host, port);
|
||||
|
||||
strncpy( url, sctx->url, 15 );
|
||||
sprintf( strstr( url, "://" ) + 3, "%s:%d", host, port );
|
||||
|
||||
if (!opt_redirect) {
|
||||
applog(LOG_INFO, "Ignoring request to reconnect to %s", url);
|
||||
|
@@ -27,6 +27,9 @@ ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
|
||||
#sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac
|
||||
|
||||
# make release directory and copy selected DLLs.
|
||||
|
||||
rm -rf release > /dev/null
|
||||
|
||||
mkdir release
|
||||
cp README.txt release/
|
||||
cp README.md release/
|
||||
@@ -35,10 +38,6 @@ cp $MINGW_LIB/zlib1.dll release/
|
||||
cp $MINGW_LIB/libwinpthread-1.dll release/
|
||||
cp $GCC_MINGW_LIB/libstdc++-6.dll release/
|
||||
cp $GCC_MINGW_LIB/libgcc_s_seh-1.dll release/
|
||||
#cp /usr/x86_64-w64-mingw32/lib/zlib1.dll release/
|
||||
#cp /usr/x86_64-w64-mingw32/lib/libwinpthread-1.dll release/
|
||||
#cp /usr/lib/gcc/x86_64-w64-mingw32/7.3-win32/libstdc++-6.dll release/
|
||||
#cp /usr/lib/gcc/x86_64-w64-mingw32/7.3-win32/libgcc_s_seh-1.dll release/
|
||||
cp $LOCAL_LIB/openssl/libcrypto-1_1-x64.dll release/
|
||||
cp $LOCAL_LIB/curl/lib/.libs/libcurl-4.dll release/
|
||||
|
||||
|
Reference in New Issue
Block a user