Compare commits

..

5 Commits

Author SHA1 Message Date
Jay D Dee
45c77a5c81 v3.12.4.2 2020-02-23 15:31:06 -05:00
Jay D Dee
dbce7e0721 v3.12.4.1 2020-02-22 18:06:39 -05:00
Jay D Dee
6d66051de6 v3.12.4 2020-02-21 16:34:53 -05:00
Jay D Dee
b93be8816a v3.12.3.1 2020-02-18 12:05:47 -05:00
Jay D Dee
19b0ac6d5c v3.12.3 2020-02-13 04:25:33 -05:00
41 changed files with 2234 additions and 2828 deletions

View File

@@ -12,10 +12,24 @@ a false positive, they are flagged simply because they are cryptocurrency
miners. The source code is open for anyone to inspect. If you don't trust
the software, don't use it.
New thread:
https://bitcointalk.org/index.php?topic=5226770.msg53865575#msg53865575
Old thread:
https://bitcointalk.org/index.php?topic=1326803.0
mailto://jayddee246@gmail.com
This note is to confirm that bitcointalk users JayDDee and joblo are the
same person.
I created a new BCT user JayDDee to match my github user id.
The old thread has been locked but still contains useful information for
reading.
See file RELEASE_NOTES for change log and INSTALL_LINUX or INSTALL_WINDOWS
for compile instructions.

View File

@@ -65,6 +65,40 @@ If not what makes it happen or not happen?
Change Log
----------
v3.12.4.2
Issue #245: fixed getwork stale shares, solo mining with getwork now works.
Issue #246: implemented block and summary logs for getwork.
v3.12.4.1
Issue #245: fix scantime when mining solo with getwork.
Added debug logs for creation of stratum and longpoll threads, use -D to
enable.
v3.12.4
Issue #244: Change longpoll to ignore job id.
Lyra2rev2 AVX2 +3%, AVX512 +6%.
v3.12.3.1
Issue #241: Fixed regression that broke coinbase address in v3.11.7.
v3.12.3
Issue #238: Fixed skunk AVX2.
Issue #239: Faster AVX2 & AVX512 for skein +44%, skein2 +30%, plus marginal
increases for skunk, x16r, x16rv2, x16rt, x16rt-veil, x16s, x21s.
Faster anime VAES +57%, AVX512 +21%, AVX2 +3%.
Redesigned code reponsible for #236.
v3.12.2
Fixed xevan, skein, skein2 AVX2, #238.

View File

@@ -281,39 +281,37 @@ void exec_hash_function( int algo, void *output, const void *pdata )
const char* const algo_alias_map[][2] =
{
// alias proper
{ "argon2d-crds", "argon2d250" },
{ "argon2d-dyn", "argon2d500" },
{ "argon2d-uis", "argon2d4096" },
{ "bcd", "x13bcd" },
{ "bitcore", "timetravel10" },
{ "bitzeny", "yescryptr8" },
{ "blake256r8", "blakecoin" },
{ "blake256r8vnl", "vanilla" },
{ "blake256r14", "blake" },
{ "blake256r14dcr", "decred" },
{ "cryptonote", "cryptonight" },
{ "cryptonight-light", "cryptolight" },
{ "diamond", "dmd-gr" },
{ "droplp", "drop" },
{ "espers", "hmq1725" },
{ "flax", "c11" },
{ "hsr", "x13sm3" },
{ "jackpot", "jha" },
{ "jane", "scryptjane" },
{ "lyra2", "lyra2re" },
{ "lyra2v2", "lyra2rev2" },
{ "lyra2v3", "lyra2rev3" },
{ "myrgr", "myr-gr" },
{ "myriad", "myr-gr" },
{ "neo", "neoscrypt" },
{ "phi", "phi1612" },
{ "sib", "x11gost" },
{ "timetravel8", "timetravel" },
{ "veil", "x16rt-veil" },
{ "x16r-hex", "hex" },
{ "yenten", "yescryptr16" },
{ "ziftr", "zr5" },
{ NULL, NULL }
{ "argon2d-crds", "argon2d250" },
{ "argon2d-dyn", "argon2d500" },
{ "argon2d-uis", "argon2d4096" },
{ "bcd", "x13bcd" },
{ "bitcore", "timetravel10" },
{ "bitzeny", "yescryptr8" },
{ "blake256r8", "blakecoin" },
{ "blake256r8vnl", "vanilla" },
{ "blake256r14", "blake" },
{ "blake256r14dcr", "decred" },
{ "diamond", "dmd-gr" },
{ "espers", "hmq1725" },
{ "flax", "c11" },
{ "hsr", "x13sm3" },
{ "jackpot", "jha" },
{ "jane", "scryptjane" },
{ "lyra2", "lyra2re" },
{ "lyra2v2", "lyra2rev2" },
{ "lyra2v3", "lyra2rev3" },
{ "myrgr", "myr-gr" },
{ "myriad", "myr-gr" },
{ "neo", "neoscrypt" },
{ "phi", "phi1612" },
{ "scryptn2", "scrypt:1048576" },
{ "sib", "x11gost" },
{ "timetravel8", "timetravel" },
{ "veil", "x16rt-veil" },
{ "x16r-hex", "hex" },
{ "yenten", "yescryptr16" },
{ "ziftr", "zr5" },
{ NULL, NULL }
};
// if arg is a valid alias for a known algo it is updated with the proper

View File

@@ -179,14 +179,6 @@ int cube_4way_full( cube_4way_context *sp, void *output, int hashbitlen,
sp->rounds = 16;
sp->pos = 0;
h[ 0] = m512_const1_128( iv[0] );
h[ 1] = m512_const1_128( iv[1] );
h[ 2] = m512_const1_128( iv[2] );
h[ 3] = m512_const1_128( iv[3] );
h[ 4] = m512_const1_128( iv[4] );
h[ 5] = m512_const1_128( iv[5] );
h[ 6] = m512_const1_128( iv[6] );
h[ 7] = m512_const1_128( iv[7] );
h[ 0] = m512_const1_128( iv[0] );
h[ 1] = m512_const1_128( iv[1] );
h[ 2] = m512_const1_128( iv[2] );
@@ -447,14 +439,6 @@ int cube_2way_full( cube_2way_context *sp, void *output, int hashbitlen,
sp->rounds = 16;
sp->pos = 0;
h[ 0] = m256_const1_128( iv[0] );
h[ 1] = m256_const1_128( iv[1] );
h[ 2] = m256_const1_128( iv[2] );
h[ 3] = m256_const1_128( iv[3] );
h[ 4] = m256_const1_128( iv[4] );
h[ 5] = m256_const1_128( iv[5] );
h[ 6] = m256_const1_128( iv[6] );
h[ 7] = m256_const1_128( iv[7] );
h[ 0] = m256_const1_128( iv[0] );
h[ 1] = m256_const1_128( iv[1] );
h[ 2] = m256_const1_128( iv[2] );

View File

@@ -28,6 +28,27 @@ int cube_4way_update_close( cube_4way_context *sp, void *output,
int cube_4way_full( cube_4way_context *sp, void *output, int hashbitlen,
const void *data, size_t size );
int cube_4x256_full( cube_4way_context *sp, void *output, int hashbitlen,
const void *data, size_t size );
#define cube512_4way_init( sp ) cube_4way_update( sp, 512 )
#define cube512_4way_update cube_4way_update
#define cube512_4way_update_close cube_4way_update
#define cube512_4way_close cube_4way_update
#define cube512_4way_full( sp, output, data, size ) \
cube_4way_full( sp, output, 512, data, size )
#define cube512_4x256_full( sp, output, data, size ) \
cube_4x256_full( sp, output, 512, data, size )
#define cube256_4way_init( sp ) cube_4way_update( sp, 256 )
#define cube256_4way_update cube_4way_update
#define cube256_4way_update_close cube_4way_update
#define cube256_4way_close cube_4way_update
#define cube256_4way_full( sp, output, data, size ) \
cube_4way_full( sp, output, 256, data, size )
#define cube256_4x256_full( sp, output, data, size ) \
cube_4x256_full( sp, output, 256, data, size )
#endif
// 2x128, 2 way parallel SSE2

View File

@@ -22,18 +22,26 @@ typedef struct
} echo_4way_context __attribute__ ((aligned (64)));
int echo_4way_init( echo_4way_context *state, int hashbitlen );
#define echo512_4way_init( state ) echo_4way_init( state, 512 )
#define echo256_4way_init( state ) echo_4way_init( state, 256 )
int echo_4way_update( echo_4way_context *state, const void *data,
unsigned int databitlen);
#define echo512_4way_update echo_4way_update
int echo_close( echo_4way_context *state, void *hashval );
#define echo512_4way_close echo_4way_close
int echo_4way_update_close( echo_4way_context *state, void *hashval,
const void *data, int databitlen );
#define echo512_4way_update_close echo_4way_update_close
int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize,
const void *data, int datalen );
#define echo512_4way_full( state, hashval, data, datalen ) \
echo_4way_full( state, hashval, 512, data, datalen )
#define echo256_4way_full( state, hashval, data, datalen ) \
echo_4way_full( state, hashval, 256, data, datalen )
#endif
#endif

View File

@@ -74,6 +74,14 @@ void sph_fugue512_close(void *cc, void *dst);
void sph_fugue512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#define sph_fugue512_full( cc, dst, data, len ) \
do{ \
sph_fugue512_init( cc ); \
sph_fugue512( cc, data, len ); \
sph_fugue512_close( cc, dst ); \
}while(0)
#ifdef __cplusplus
}
#endif

View File

@@ -94,12 +94,12 @@ bool lyra2rev2_thread_init()
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
#if defined (LYRA2REV2_8WAY)
#if defined (LYRA2REV2_16WAY)
l2v2_wholeMatrix = _mm_malloc( 2 * size, 64 ); // 2 way
init_lyra2rev2_8way_ctx();;
#elif defined (LYRA2REV2_4WAY)
init_lyra2rev2_16way_ctx();;
#elif defined (LYRA2REV2_8WAY)
l2v2_wholeMatrix = _mm_malloc( size, 64 );
init_lyra2rev2_4way_ctx();;
init_lyra2rev2_8way_ctx();;
#else
l2v2_wholeMatrix = _mm_malloc( size, 64 );
init_lyra2rev2_ctx();
@@ -109,12 +109,12 @@ bool lyra2rev2_thread_init()
bool register_lyra2rev2_algo( algo_gate_t* gate )
{
#if defined (LYRA2REV2_8WAY)
#if defined (LYRA2REV2_16WAY)
gate->scanhash = (void*)&scanhash_lyra2rev2_16way;
gate->hash = (void*)&lyra2rev2_16way_hash;
#elif defined (LYRA2REV2_8WAY)
gate->scanhash = (void*)&scanhash_lyra2rev2_8way;
gate->hash = (void*)&lyra2rev2_8way_hash;
#elif defined (LYRA2REV2_4WAY)
gate->scanhash = (void*)&scanhash_lyra2rev2_4way;
gate->hash = (void*)&lyra2rev2_4way_hash;
#else
gate->scanhash = (void*)&scanhash_lyra2rev2;
gate->hash = (void*)&lyra2rev2_hash;

View File

@@ -51,30 +51,32 @@ bool init_lyra2rev3_ctx();
//////////////////////////////////
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define LYRA2REV2_8WAY 1
#define LYRA2REV2_16WAY 1
#elif defined(__AVX2__)
#define LYRA2REV2_4WAY 1
#define LYRA2REV2_8WAY 1
#endif
extern __thread uint64_t* l2v2_wholeMatrix;
bool register_lyra2rev2_algo( algo_gate_t* gate );
#if defined(LYRA2REV2_8WAY)
#if defined(LYRA2REV2_16WAY)
void lyra2rev2_16way_hash( void *state, const void *input );
int scanhash_lyra2rev2_16way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
bool init_lyra2rev2_16way_ctx();
#elif defined(LYRA2REV2_8WAY)
void lyra2rev2_8way_hash( void *state, const void *input );
int scanhash_lyra2rev2_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
bool init_lyra2rev2_8way_ctx();
#elif defined(LYRA2REV2_4WAY)
void lyra2rev2_4way_hash( void *state, const void *input );
int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
bool init_lyra2rev2_4way_ctx();
#else
void lyra2rev2_hash( void *state, const void *input );
int scanhash_lyra2rev2( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );

View File

@@ -8,12 +8,30 @@
#include "algo/cubehash/cube-hash-2way.h"
#if 0
void lyra2rev2_8way_hash( void *state, const void *input )
#if defined (LYRA2REV2_16WAY)
typedef struct {
blake256_16way_context blake;
keccak256_8way_context keccak;
cubehashParam cube;
skein256_8way_context skein;
bmw256_16way_context bmw;
} lyra2v2_16way_ctx_holder __attribute__ ((aligned (64)));
static lyra2v2_16way_ctx_holder l2v2_16way_ctx;
bool init_lyra2rev2_16way_ctx()
{
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
uint32_t vhashB[8*8] __attribute__ ((aligned (64)));
keccak256_8way_init( &l2v2_16way_ctx.keccak );
cubehashInit( &l2v2_16way_ctx.cube, 256, 16, 32 );
skein256_8way_init( &l2v2_16way_ctx.skein );
bmw256_16way_init( &l2v2_16way_ctx.bmw );
return true;
}
void lyra2rev2_16way_hash( void *state, const void *input )
{
uint32_t vhash[8*16] __attribute__ ((aligned (128)));
uint32_t hash0[8] __attribute__ ((aligned (64)));
uint32_t hash1[8] __attribute__ ((aligned (64)));
uint32_t hash2[8] __attribute__ ((aligned (64)));
@@ -22,35 +40,60 @@ void lyra2rev2_8way_hash( void *state, const void *input )
uint32_t hash5[8] __attribute__ ((aligned (64)));
uint32_t hash6[8] __attribute__ ((aligned (64)));
uint32_t hash7[8] __attribute__ ((aligned (64)));
lyra2v2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v2_8way_ctx, sizeof(l2v2_8way_ctx) );
uint32_t hash8[8] __attribute__ ((aligned (64)));
uint32_t hash9[8] __attribute__ ((aligned (64)));
uint32_t hash10[8] __attribute__ ((aligned (64)));
uint32_t hash11[8] __attribute__ ((aligned (64)));
uint32_t hash12[8] __attribute__ ((aligned (64)));
uint32_t hash13[8] __attribute__ ((aligned (64)));
uint32_t hash14[8] __attribute__ ((aligned (64)));
uint32_t hash15[8] __attribute__ ((aligned (64)));
lyra2v2_16way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v2_16way_ctx, sizeof(l2v2_16way_ctx) );
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
blake256_8way_close( &ctx.blake, vhash );
blake256_16way_update( &ctx.blake, input + (64<<4), 16 );
blake256_16way_close( &ctx.blake, vhash );
rintrlv_8x32_8x64( vhashA, vhash, 256 );
dintrlv_16x32( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7,
hash8, hash9, hash10, hash11,
hash12, hash13, hash14, hash15, vhash, 256 );
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, 256 );
keccak256_8way_update( &ctx.keccak, vhash, 32 );
keccak256_8way_close( &ctx.keccak, vhash );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
intrlv_8x64( vhash, hash8, hash9, hash10, hash11,
hash12, hash13, hash14, hash15, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
keccak256_8way_init( &ctx.keccak );
keccak256_8way_update( &ctx.keccak, vhash, 32 );
keccak256_8way_close( &ctx.keccak, vhash );
dintrlv_8x64( hash8, hash9, hash10, hash11,
hash12, hash13, hash14, hash5, vhash, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
cubehash_full( &ctx.cube, (byte*) hash8, 256, (const byte*) hash8, 32 );
cubehash_full( &ctx.cube, (byte*) hash9, 256, (const byte*) hash9, 32 );
cubehash_full( &ctx.cube, (byte*) hash10, 256, (const byte*) hash10, 32 );
cubehash_full( &ctx.cube, (byte*) hash11, 256, (const byte*) hash11, 32 );
cubehash_full( &ctx.cube, (byte*) hash12, 256, (const byte*) hash12, 32 );
cubehash_full( &ctx.cube, (byte*) hash13, 256, (const byte*) hash13, 32 );
cubehash_full( &ctx.cube, (byte*) hash14, 256, (const byte*) hash14, 32 );
cubehash_full( &ctx.cube, (byte*) hash15, 256, (const byte*) hash15, 32 );
// cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
// cube_4way_init( &ctx.cube, 256, 16, 32 );
// cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
//
// dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
// dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
intrlv_2x256( vhash, hash0, hash1, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
@@ -64,61 +107,127 @@ void lyra2rev2_8way_hash( void *state, const void *input )
intrlv_2x256( vhash, hash6, hash7, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash6, hash7, vhash, 256 );
intrlv_2x256( vhash, hash8, hash9, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash8, hash9, vhash, 256 );
intrlv_2x256( vhash, hash10, hash11, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash10, hash11, vhash, 256 );
intrlv_2x256( vhash, hash12, hash13, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash12, hash13, vhash, 256 );
intrlv_2x256( vhash, hash14, hash15, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash14, hash15, vhash, 256 );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, 256 );
skein256_8way_update( &ctx.skein, vhash, 32 );
skein256_8way_close( &ctx.skein, vhash );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
intrlv_8x64( vhash, hash8, hash9, hash10, hash11, hash12,
hash13, hash14, hash15, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
skein256_8way_init( &ctx.skein );
skein256_8way_update( &ctx.skein, vhash, 32 );
skein256_8way_close( &ctx.skein, vhash );
// cube_4way_init( &ctx.cube, 256, 16, 32 );
// cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
// cube_4way_init( &ctx.cube, 256, 16, 32 );
// cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
//
// dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
// dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
dintrlv_8x64( hash8, hash9, hash10, hash11,
hash12, hash13, hash14, hash15, vhash, 256 );
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
cubehash_full( &ctx.cube, (byte*) hash8, 256, (const byte*) hash8, 32 );
cubehash_full( &ctx.cube, (byte*) hash9, 256, (const byte*) hash9, 32 );
cubehash_full( &ctx.cube, (byte*) hash10, 256, (const byte*) hash10, 32 );
cubehash_full( &ctx.cube, (byte*) hash11, 256, (const byte*) hash11, 32 );
cubehash_full( &ctx.cube, (byte*) hash12, 256, (const byte*) hash12, 32 );
cubehash_full( &ctx.cube, (byte*) hash13, 256, (const byte*) hash13, 32 );
cubehash_full( &ctx.cube, (byte*) hash14, 256, (const byte*) hash14, 32 );
cubehash_full( &ctx.cube, (byte*) hash15, 256, (const byte*) hash15, 32 );
bmw256_8way_update( &ctx.bmw, vhash, 32 );
bmw256_8way_close( &ctx.bmw, state );
intrlv_16x32( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7,
hash8, hash9, hash10, hash11,
hash12, hash13, hash14, hash15, 256 );
bmw256_16way_update( &ctx.bmw, vhash, 32 );
bmw256_16way_close( &ctx.bmw, state );
}
#endif
int scanhash_lyra2rev2_16way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*16] __attribute__ ((aligned (128)));
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &hash[7*16];
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 16;
uint32_t n = first_nonce;
const uint32_t targ32 = ptarget[7];
__m512i *noncev = (__m512i*)vdata + 19;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x0000ff;
mm512_bswap32_intrlv80_16x32( vdata, pdata );
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+ 1, n );
blake256_16way_init( &l2v2_16way_ctx.blake );
blake256_16way_update( &l2v2_16way_ctx.blake, vdata, 64 );
#if defined (LYRA2REV2_8WAY)
do
{
lyra2rev2_16way_hash( hash, vdata );
for ( int lane = 0; lane < 16; lane++ )
if ( unlikely( hashd7[lane] <= targ32 ) )
{
extr_lane_16x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
n += 16;
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined (LYRA2REV2_8WAY)
typedef struct {
blake256_8way_context blake;
keccak256_8way_context keccak;
keccak256_4way_context keccak;
cubehashParam cube;
skein256_8way_context skein;
bmw256_8way_context bmw;
skein256_4way_context skein;
bmw256_8way_context bmw;
} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
bool init_lyra2rev2_8way_ctx()
{
keccak256_8way_init( &l2v2_8way_ctx.keccak );
keccak256_4way_init( &l2v2_8way_ctx.keccak );
cubehashInit( &l2v2_8way_ctx.cube, 256, 16, 32 );
skein256_8way_init( &l2v2_8way_ctx.skein );
skein256_4way_init( &l2v2_8way_ctx.skein );
bmw256_8way_init( &l2v2_8way_ctx.bmw );
return true;
}
@@ -126,7 +235,6 @@ bool init_lyra2rev2_8way_ctx()
void lyra2rev2_8way_hash( void *state, const void *input )
{
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
uint32_t hash0[8] __attribute__ ((aligned (64)));
uint32_t hash1[8] __attribute__ ((aligned (64)));
uint32_t hash2[8] __attribute__ ((aligned (64)));
@@ -141,14 +249,19 @@ void lyra2rev2_8way_hash( void *state, const void *input )
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
blake256_8way_close( &ctx.blake, vhash );
rintrlv_8x32_8x64( vhashA, vhash, 256 );
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
keccak256_8way_close( &ctx.keccak, vhash );
dintrlv_8x64( hash0, hash1, hash2, hash3,
dintrlv_8x32( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
keccak256_4way_update( &ctx.keccak, vhash, 32 );
keccak256_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
keccak256_4way_init( &ctx.keccak );
keccak256_4way_update( &ctx.keccak, vhash, 32 );
keccak256_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
@@ -158,27 +271,25 @@ void lyra2rev2_8way_hash( void *state, const void *input )
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
intrlv_2x256( vhash, hash0, hash1, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash0, hash1, vhash, 256 );
intrlv_2x256( vhash, hash2, hash3, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash2, hash3, vhash, 256 );
intrlv_2x256( vhash, hash4, hash5, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash4, hash5, vhash, 256 );
intrlv_2x256( vhash, hash6, hash7, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash6, hash7, vhash, 256 );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 );
skein256_8way_update( &ctx.skein, vhash, 32 );
skein256_8way_close( &ctx.skein, vhash );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
LYRA2REV2( l2v2_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
LYRA2REV2( l2v2_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 );
LYRA2REV2( l2v2_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
LYRA2REV2( l2v2_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
LYRA2REV2( l2v2_wholeMatrix, hash4, 32, hash4, 32, hash4, 32, 1, 4, 4 );
LYRA2REV2( l2v2_wholeMatrix, hash5, 32, hash5, 32, hash5, 32, 1, 4, 4 );
LYRA2REV2( l2v2_wholeMatrix, hash6, 32, hash6, 32, hash6, 32, 1, 4, 4 );
LYRA2REV2( l2v2_wholeMatrix, hash7, 32, hash7, 32, hash7, 32, 1, 4, 4 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
skein256_4way_update( &ctx.skein, vhash, 32 );
skein256_4way_close( &ctx.skein, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
skein256_4way_init( &ctx.skein );
skein256_4way_update( &ctx.skein, vhash, 32 );
skein256_4way_close( &ctx.skein, vhash );
dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
@@ -189,8 +300,8 @@ void lyra2rev2_8way_hash( void *state, const void *input )
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 );
intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, 256 );
bmw256_8way_update( &ctx.bmw, vhash, 32 );
bmw256_8way_close( &ctx.bmw, state );
@@ -223,7 +334,6 @@ int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
do
{
lyra2rev2_8way_hash( hash, vdata );
pdata[19] = n;
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hashd7[lane] <= targ32 ) )
@@ -243,6 +353,9 @@ int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
return 0;
}
#endif
/*
#elif defined (LYRA2REV2_4WAY)
typedef struct {
@@ -367,3 +480,4 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
}
#endif
*/

View File

@@ -1,18 +1,241 @@
#include "cpuminer-config.h"
#include "anime-gate.h"
#if defined (ANIME_4WAY)
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
#include "algo/keccak/keccak-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#if defined(__VAES__)
#include "algo/groestl/groestl512-hash-4way.h"
#endif
#if defined (ANIME_8WAY)
typedef struct {
blake512_8way_context blake;
bmw512_8way_context bmw;
#if defined(__VAES__)
groestl512_4way_context groestl;
#else
hashState_groestl groestl;
#endif
jh512_8way_context jh;
skein512_8way_context skein;
keccak512_8way_context keccak;
} anime_8way_ctx_holder;
anime_8way_ctx_holder anime_8way_ctx __attribute__ ((aligned (64)));
void init_anime_8way_ctx()
{
blake512_8way_init( &anime_8way_ctx.blake );
bmw512_8way_init( &anime_8way_ctx.bmw );
#if defined(__VAES__)
groestl512_4way_init( &anime_8way_ctx.groestl, 64 );
#else
init_groestl( &anime_8way_ctx.groestl, 64 );
#endif
skein512_8way_init( &anime_8way_ctx.skein );
jh512_8way_init( &anime_8way_ctx.jh );
keccak512_8way_init( &anime_8way_ctx.keccak );
}
void anime_8way_hash( void *state, const void *input )
{
uint64_t vhash[8*8] __attribute__ ((aligned (128)));
uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
uint64_t vhashB[8*8] __attribute__ ((aligned (64)));
uint64_t vhashC[8*8] __attribute__ ((aligned (64)));
#if !defined(__VAES__)
uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64)));
uint64_t hash2[8] __attribute__ ((aligned (64)));
uint64_t hash3[8] __attribute__ ((aligned (64)));
uint64_t hash4[8] __attribute__ ((aligned (64)));
uint64_t hash5[8] __attribute__ ((aligned (64)));
uint64_t hash6[8] __attribute__ ((aligned (64)));
uint64_t hash7[8] __attribute__ ((aligned (64)));
#endif
__m512i* vh = (__m512i*)vhash;
__m512i* vhA = (__m512i*)vhashA;
__m512i* vhB = (__m512i*)vhashB;
__m512i* vhC = (__m512i*)vhashC;
const __m512i bit3_mask = m512_const1_64( 8 );
const __m512i zero = _mm512_setzero_si512();
__mmask8 vh_mask;
anime_8way_ctx_holder ctx;
memcpy( &ctx, &anime_8way_ctx, sizeof(anime_8way_ctx) );
bmw512_8way_full( &ctx.bmw, vhash, input, 80 );
blake512_8way_full( &ctx.blake, vhash, vhash, 64 );
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
zero );
#if defined(__VAES__)
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
if ( ( vh_mask & 0x0f ) != 0x0f )
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
if ( ( vh_mask & 0xf0 ) != 0xf0 )
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
rintrlv_4x128_8x64( vhashC, vhashA, vhashB, 512 );
#else
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash );
if ( hash0[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
if ( hash1[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
if ( hash2[0] & 8)
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
if ( hash3[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
if ( hash4[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
if ( hash5[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
if ( hash6[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
if ( hash7[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
intrlv_8x64_512( vhashC, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7 );
#endif
if ( vh_mask & 0xff )
skein512_8way_full( &ctx.skein, vhashB, vhash, 64 );
mm512_blend_hash_8x64( vh, vhC, vhB, vh_mask );
#if defined(__VAES__)
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
#else
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash );
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7 );
#endif
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, 64 );
jh512_8way_close( &ctx.jh, vhash );
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
zero );
if ( ( vh_mask & 0xff ) != 0xff )
blake512_8way_full( &ctx.blake, vhashA, vhash, 64 );
if ( vh_mask & 0xff )
bmw512_8way_full( &ctx.bmw, vhashB, vhash, 64 );
mm512_blend_hash_8x64( vh, vhA, vhB, vh_mask );
keccak512_8way_init( &ctx.keccak );
keccak512_8way_update( &ctx.keccak, vhash, 64 );
keccak512_8way_close( &ctx.keccak, vhash );
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
zero );
if ( ( vh_mask & 0xff ) != 0xff )
{
keccak512_8way_init( &ctx.keccak );
keccak512_8way_update( &ctx.keccak, vhash, 64 );
keccak512_8way_close( &ctx.keccak, vhashA );
}
if ( vh_mask & 0xff )
{
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, 64 );
jh512_8way_close( &ctx.jh, vhashB );
}
casti_m512i( state,0 ) = _mm512_mask_blend_epi64( vh_mask, vhA[0], vhB[0] );
casti_m512i( state,1 ) = _mm512_mask_blend_epi64( vh_mask, vhA[1], vhB[1] );
casti_m512i( state,2 ) = _mm512_mask_blend_epi64( vh_mask, vhA[2], vhB[2] );
casti_m512i( state,3 ) = _mm512_mask_blend_epi64( vh_mask, vhA[3], vhB[3] );
}
int scanhash_anime_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint64_t hash64[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint64_t *hash64_q3 = &(hash64[3*8]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
do
{
anime_8way_hash( hash64, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hash64_q3[ lane ] <= targ64_q3 && !bench ) )
{
extr_lane_8x64( lane_hash, hash64, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined (ANIME_4WAY)
typedef struct {
blake512_4way_context blake;
@@ -23,18 +246,6 @@ typedef struct {
keccak512_4way_context keccak;
} anime_4way_ctx_holder;
anime_4way_ctx_holder anime_4way_ctx __attribute__ ((aligned (64)));
void init_anime_4way_ctx()
{
blake512_4way_init( &anime_4way_ctx.blake );
bmw512_4way_init( &anime_4way_ctx.bmw );
init_groestl( &anime_4way_ctx.groestl, 64 );
skein512_4way_init( &anime_4way_ctx.skein );
jh512_4way_init( &anime_4way_ctx.jh );
keccak512_4way_init( &anime_4way_ctx.keccak );
}
void anime_4way_hash( void *state, const void *input )
{
uint64_t hash0[8] __attribute__ ((aligned (64)));
@@ -48,81 +259,61 @@ void anime_4way_hash( void *state, const void *input )
__m256i* vhA = (__m256i*)vhashA;
__m256i* vhB = (__m256i*)vhashB;
__m256i vh_mask;
const uint32_t mask = 8;
int h_mask;
const __m256i bit3_mask = m256_const1_64( 8 );
const __m256i zero = _mm256_setzero_si256();
anime_4way_ctx_holder ctx;
memcpy( &ctx, &anime_4way_ctx, sizeof(anime_4way_ctx) );
bmw512_4way_init( &ctx.bmw );
bmw512_4way_update( &ctx.bmw, input, 80 );
bmw512_4way_close( &ctx.bmw, vhash );
blake512_4way_update( &ctx.blake, vhash, 64 );
blake512_4way_close( &ctx.blake, vhash );
blake512_4way_full( &ctx.blake, vhash, vhash, 64 );
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
h_mask = _mm256_movemask_epi8( vh_mask );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
if ( hash0[0] & mask )
{
update_and_final_groestl( &ctx.groestl, (char*)hash0,
(char*)hash0, 512 );
}
if ( hash1[0] & mask )
{
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash1,
(char*)hash1, 512 );
}
if ( hash2[0] & mask )
{
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash2,
(char*)hash2, 512 );
}
if ( hash3[0] & mask )
{
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash3,
(char*)hash3, 512 );
}
// A
if ( hash0[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
if ( hash1[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
if ( hash2[0] & 8)
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
if ( hash3[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
if ( mm256_anybits0( vh_mask ) )
{
skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhashB );
}
// B
if ( h_mask & 0xffffffff )
skein512_4way_full( &ctx.skein, vhashB, vhash, 64 );
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash );
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
h_mask = _mm256_movemask_epi8( vh_mask );
if ( mm256_anybits1( vh_mask ) )
{
blake512_4way_init( &ctx.blake );
blake512_4way_update( &ctx.blake, vhash, 64 );
blake512_4way_close( &ctx.blake, vhashA );
}
if ( mm256_anybits0( vh_mask ) )
// A
if ( ( h_mask & 0xffffffff ) != 0xffffffff )
blake512_4way_full( &ctx.blake, vhashA, vhash, 64 );
// B
if ( h_mask & 0xffffffff )
{
bmw512_4way_init( &ctx.bmw );
bmw512_4way_update( &ctx.bmw, vhash, 64 );
@@ -131,64 +322,74 @@ void anime_4way_hash( void *state, const void *input )
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
keccak512_4way_init( &ctx.keccak );
keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash );
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
h_mask = _mm256_movemask_epi8( vh_mask );
if ( mm256_anybits1( vh_mask ) )
// A
if ( ( h_mask & 0xffffffff ) != 0xffffffff )
{
keccak512_4way_init( &ctx.keccak );
keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhashA );
}
if ( mm256_anybits0( vh_mask ) )
// B
if ( h_mask & 0xffffffff )
{
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhashB );
}
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
dintrlv_4x64( state, state+32, state+64, state+96, vhash, 256 );
casti_m256i( state, 0 ) = _mm256_blendv_epi8( vhA[0], vhB[0], vh_mask );
casti_m256i( state, 1 ) = _mm256_blendv_epi8( vhA[1], vhB[1], vh_mask );
casti_m256i( state, 2 ) = _mm256_blendv_epi8( vhA[2], vhB[2], vh_mask );
casti_m256i( state, 3 ) = _mm256_blendv_epi8( vhA[3], vhB[3], vh_mask );
}
int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint64_t hash64[4*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint64_t *hash64_q3 = &(hash64[3*4]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
__m256i *noncev = (__m256i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
anime_4way_hash( hash, vdata );
anime_4way_hash( hash64, vdata );
for ( int i = 0; i < 4; i++ )
if ( valid_hash( hash+(i<<3), ptarget ) && !opt_benchmark )
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hash64_q3[ lane ] <= targ64_q3 && !bench ) )
{
pdata[19] = bswap_32( n+i );
submit_solution( work, hash+(i<<3), mythr );
extr_lane_4x64( lane_hash, hash64, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;

View File

@@ -2,8 +2,10 @@
bool register_anime_algo( algo_gate_t* gate )
{
#if defined (ANIME_4WAY)
init_anime_4way_ctx();
#if defined (ANIME_8WAY)
gate->scanhash = (void*)&scanhash_anime_8way;
gate->hash = (void*)&anime_8way_hash;
#elif defined (ANIME_4WAY)
gate->scanhash = (void*)&scanhash_anime_4way;
gate->hash = (void*)&anime_4way_hash;
#else
@@ -11,7 +13,7 @@ bool register_anime_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_anime;
gate->hash = (void*)&anime_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
return true;
};

View File

@@ -4,18 +4,25 @@
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(__AVX2__) && defined(__AES__)
#define ANIME_4WAY
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define ANIME_8WAY 1
#elif defined(__AVX2__) && defined(__AES__)
#define ANIME_4WAY 1
#endif
bool register_anime_algo( algo_gate_t* gate );
#if defined(ANIME_4WAY)
#if defined(ANIME_8WAY)
void anime_8way_hash( void *state, const void *input );
int scanhash_anime_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(ANIME_4WAY)
void anime_4way_hash( void *state, const void *input );
int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void init_anime_4way_ctx();
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -72,12 +72,10 @@ void quark_8way_hash( void *state, const void *input )
memcpy( &ctx, &quark_8way_ctx, sizeof(quark_8way_ctx) );
blake512_8way_update( &ctx.blake, input, 80 );
blake512_8way_close( &ctx.blake, vhash );
bmw512_8way_update( &ctx.bmw, vhash, 64 );
bmw512_8way_close( &ctx.bmw, vhash );
blake512_8way_full( &ctx.blake, vhash, input, 80 );
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
zero );
@@ -86,70 +84,34 @@ void quark_8way_hash( void *state, const void *input )
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
if ( ( vh_mask & 0x0f ) != 0x0f )
{
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, 512 );
}
if ( ( vh_mask & 0xf0 ) != 0xf0 )
{
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, 512 );
}
rintrlv_4x128_8x64( vhashC, vhashA, vhashB, 512 );
if ( ( vh_mask & 0x0f ) != 0x0f )
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
if ( ( vh_mask & 0xf0 ) != 0xf0 )
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
rintrlv_4x128_8x64( vhashC, vhashA, vhashB, 512 );
#else
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash, 512 );
if ( hash0[0] & mask )
{
update_and_final_groestl( &ctx.groestl, (char*)hash0,
(char*)hash0, 512 );
}
if ( hash1[0] & mask )
{
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash1,
(char*)hash1, 512 );
}
if ( hash2[0] & mask )
{
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash2,
(char*)hash2, 512 );
}
if ( hash3[0] & mask )
{
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash3,
(char*)hash3, 512 );
}
if ( hash4[0] & mask )
{
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash4,
(char*)hash4, 512 );
}
if ( hash5[0] & mask )
{
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash5,
(char*)hash5, 512 );
}
if ( hash6[0] & mask )
{
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash6,
(char*)hash6, 512 );
}
if ( hash7[0] & mask )
{
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash7,
(char*)hash7, 512 );
}
if ( hash0[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
if ( hash1[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
if ( hash2[0] & 8)
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
if ( hash3[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
if ( hash4[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
if ( hash5[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
if ( hash6[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
if ( hash7[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
intrlv_8x64( vhashC, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 512 );
@@ -157,10 +119,7 @@ void quark_8way_hash( void *state, const void *input )
#endif
if ( vh_mask & 0xff )
{
skein512_8way_update( &ctx.skein, vhash, 64 );
skein512_8way_close( &ctx.skein, vhashB );
}
skein512_8way_full( &ctx.skein, vhashB, vhash, 64 );
mm512_blend_hash_8x64( vh, vhC, vhB, vh_mask );
@@ -168,10 +127,10 @@ void quark_8way_hash( void *state, const void *input )
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, 512 );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, 512 );
if ( ( vh_mask & 0x0f ) != 0x0f )
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
if ( ( vh_mask & 0xf0 ) != 0xf0 )
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
@@ -180,22 +139,22 @@ void quark_8way_hash( void *state, const void *input )
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
if ( hash0[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
if ( hash1[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
if ( hash2[0] & 8)
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
if ( hash3[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
if ( hash4[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
if ( hash5[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
if ( hash6[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
if ( hash7[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
512 );
@@ -209,27 +168,16 @@ void quark_8way_hash( void *state, const void *input )
zero );
if ( ( vh_mask & 0xff ) != 0xff )
{
blake512_8way_init( &ctx.blake );
blake512_8way_update( &ctx.blake, vhash, 64 );
blake512_8way_close( &ctx.blake, vhashA );
}
blake512_8way_full( &ctx.blake, vhashA, vhash, 64 );
if ( vh_mask & 0xff )
{
bmw512_8way_init( &ctx.bmw );
bmw512_8way_update( &ctx.bmw, vhash, 64 );
bmw512_8way_close( &ctx.bmw, vhashB );
}
bmw512_8way_full( &ctx.bmw, vhashB, vhash, 64 );
mm512_blend_hash_8x64( vh, vhA, vhB, vh_mask );
keccak512_8way_update( &ctx.keccak, vhash, 64 );
keccak512_8way_close( &ctx.keccak, vhash );
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, 64 );
skein512_8way_close( &ctx.skein, vhash );
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
zero );
@@ -258,41 +206,44 @@ void quark_8way_hash( void *state, const void *input )
int scanhash_quark_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
uint64_t hash64[4*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[49]);
uint32_t *pdata = work->data;
uint64_t *hash64_q3 = &(hash64[3*8]);
uint32_t *ptarget = work->target;
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
uint32_t *pdata = work->data;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
__m512i *noncev = (__m512i*)vdata + 9; // aligned
int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
do
{
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
quark_8way_hash( hash64, vdata );
quark_8way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 8; i++ )
if ( unlikely( hash7[ i<<1 ] <= Htarg ) )
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hash64_q3[ lane ] <= targ64_q3 && !bench ) )
{
extr_lane_8x64( lane_hash, hash, i, 256 );
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
extr_lane_8x64( lane_hash, hash64, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = n+i;
submit_lane_solution( work, lane_hash, mythr, i );
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( ( n < max_nonce-8 ) && !work_restart[thr_id].restart );
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
@@ -333,67 +284,47 @@ void quark_4way_hash( void *state, const void *input )
__m256i* vhA = (__m256i*)vhashA;
__m256i* vhB = (__m256i*)vhashB;
__m256i vh_mask;
int h_mask;
quark_4way_ctx_holder ctx;
const __m256i bit3_mask = m256_const1_64( 8 );
const uint32_t mask = 8;
const __m256i zero = _mm256_setzero_si256();
memcpy( &ctx, &quark_4way_ctx, sizeof(quark_4way_ctx) );
blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
blake512_4way_full( &ctx.blake, vhash, input, 80 );
bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
h_mask = _mm256_movemask_epi8( vh_mask );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
if ( hash0[0] & mask )
{
update_and_final_groestl( &ctx.groestl, (char*)hash0,
(char*)hash0, 512 );
}
if ( hash1[0] & mask )
{
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash1,
(char*)hash1, 512 );
}
if ( hash2[0] & mask )
{
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash2,
(char*)hash2, 512 );
}
if ( hash3[0] & mask )
{
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash3,
(char*)hash3, 512 );
}
// A
if ( hash0[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
if ( hash1[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
if ( hash2[0] & 8)
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
if ( hash3[0] & 8 )
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
if ( mm256_anybits1( vh_mask ) )
{
skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhashB );
}
// B
if ( likely( h_mask & 0xffffffff ) )
skein512_4way_full( &ctx.skein, vhashB, vhash, 64 );
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
@@ -401,15 +332,13 @@ void quark_4way_hash( void *state, const void *input )
jh512_4way_close( &ctx.jh, vhash );
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
h_mask = _mm256_movemask_epi8( vh_mask );
if ( mm256_anybits1( vh_mask ) )
{
blake512_4way_init( &ctx.blake );
blake512_4way_update( &ctx.blake, vhash, 64 );
blake512_4way_close( &ctx.blake, vhashA );
}
if ( mm256_anybits0( vh_mask ) )
// A
if ( likely( ( h_mask & 0xffffffff ) != 0xffffffff ) )
blake512_4way_full( &ctx.blake, vhashA, vhash, 64 );
// B
if ( likely( h_mask & 0xffffffff ) )
{
bmw512_4way_init( &ctx.bmw );
bmw512_4way_update( &ctx.bmw, vhash, 64 );
@@ -421,20 +350,20 @@ void quark_4way_hash( void *state, const void *input )
keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash );
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
h_mask = _mm256_movemask_epi8( vh_mask );
if ( mm256_anybits1( vh_mask ) )
// A
if ( likely( ( h_mask & 0xffffffff ) != 0xffffffff ) )
{
keccak512_4way_init( &ctx.keccak );
keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhashA );
}
if ( mm256_anybits0( vh_mask ) )
// B
if ( likely( h_mask & 0xffffffff ) )
{
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, 64 );
@@ -451,41 +380,44 @@ void quark_4way_hash( void *state, const void *input )
int scanhash_quark_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint64_t hash64[4*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[25]);
uint64_t *hash64_q3 = &(hash64[3*4]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
__m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
quark_4way_hash( hash64, vdata );
quark_4way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 4; i++ )
if ( unlikely( hash7[ i<<1 ] <= Htarg ) )
for ( int lane = 0; lane < 4; lane++ )
if ( hash64_q3[ lane ] <= targ64_q3 && !bench )
{
extr_lane_4x64( lane_hash, hash, i, 256 );
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
extr_lane_4x64( lane_hash, hash64, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = n+i;
submit_lane_solution( work, lane_hash, mythr, i );
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -707,6 +707,7 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
int thr_id = mythr->id; // thr_id arg is deprecated
int throughput = scrypt_best_throughput();
int i;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
#ifdef HAVE_SHA256_4WAY
if (sha256_use_4way())
@@ -757,7 +758,7 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
submit_solution( work, hash, mythr );
}
}
} while (likely(n < max_nonce && !work_restart[thr_id].restart));
} while ( likely( n < max_nonce && !(*restart) ) );
*hashes_done = n - pdata[19] + 1;
pdata[19] = n;

View File

@@ -33,7 +33,7 @@
#include <stddef.h>
#include <string.h>
#ifdef __AES__
#if defined(__AES__)
#include "sph_shavite.h"
#include "simd-utils.h"

View File

@@ -35,6 +35,8 @@
#include "sph_shavite.h"
#if !defined(__AES__)
#ifdef __cplusplus
extern "C"{
#endif
@@ -1762,3 +1764,6 @@ sph_shavite512_sw_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst
#ifdef __cplusplus
}
#endif
#endif // !AES

View File

@@ -262,15 +262,9 @@ void sph_shavite384_close(void *cc, void *dst);
void sph_shavite384_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
// Always define sw but only define aesni when available
// Define fptrs for aesni or sw, not both.
void sph_shavite512_sw_init(void *cc);
void sph_shavite512_sw(void *cc, const void *data, size_t len);
void sph_shavite512_sw_close(void *cc, void *dst);
void sph_shavite512_sw_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
//Don't call these directly from application code, use the macros below.
#ifdef __AES__
void sph_shavite512_aesni_init(void *cc);
void sph_shavite512_aesni(void *cc, const void *data, size_t len);
void sph_shavite512_aesni_close(void *cc, void *dst);
@@ -285,6 +279,13 @@ void sph_shavite512_aesni_addbits_and_close(
#else
void sph_shavite512_sw_init(void *cc);
void sph_shavite512_sw(void *cc, const void *data, size_t len);
void sph_shavite512_sw_close(void *cc, void *dst);
void sph_shavite512_sw_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#define sph_shavite512_init sph_shavite512_sw_init
#define sph_shavite512 sph_shavite512_sw
#define sph_shavite512_close sph_shavite512_sw_close
@@ -293,6 +294,20 @@ void sph_shavite512_aesni_addbits_and_close(
#endif
// Use these macros from application code.
#define shavite512_context sph_shavite512_context
#define shavite512_init sph_shavite512_init
#define shavite512_update sph_shavite512
#define shavite512_close sph_shavite512_close
#define shavite512_full( cc, dst, data, len ) \
do{ \
shavite512_init( cc ); \
shavite512_update( cc, data, len ); \
shavite512_close( cc, dst ); \
}while(0)
#ifdef __cplusplus
}
#endif

View File

@@ -24,11 +24,7 @@ void skeinhash_8way( void *state, const void *input )
uint32_t vhash32[16*8] __attribute__ ((aligned (128)));
sha256_8way_context ctx_sha256;
skein512_8way_full( &ctx_skein, vhash64, input, 80 );
// skein512_8way_update( &ctx_skein, input + (64*8), 16 );
// skein512_8way_close( &ctx_skein, vhash64 );
skein512_8way_final16( &ctx_skein, vhash64, input + (64*8) );
rintrlv_8x64_8x32( vhash32, vhash64, 512 );
sha256_8way_init( &ctx_sha256 );
@@ -57,8 +53,7 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
// skein512_8way_init( &skein512_8way_ctx );
// skein512_8way_update( &skein512_8way_ctx, vdata, 64 );
skein512_8way_prehash64( &skein512_8way_ctx, vdata );
do
{
skeinhash_8way( hash, vdata );
@@ -85,14 +80,14 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
#elif defined (SKEIN_4WAY)
//static __thread skein512_4way_context skein512_4way_ctx
// __attribute__ ((aligned (64)));
static __thread skein512_4way_context skein512_4way_ctx
__attribute__ ((aligned (64)));
void skeinhash_4way( void *state, const void *input )
{
uint64_t vhash64[8*4] __attribute__ ((aligned (128)));
skein512_4way_context ctx_skein;
// memcpy( &ctx_skein, &skein512_4way_ctx, sizeof( ctx_skein ) );
memcpy( &ctx_skein, &skein512_4way_ctx, sizeof( ctx_skein ) );
#if defined(__SHA__)
uint32_t hash0[16] __attribute__ ((aligned (64)));
uint32_t hash1[16] __attribute__ ((aligned (64)));
@@ -104,10 +99,7 @@ void skeinhash_4way( void *state, const void *input )
sha256_4way_context ctx_sha256;
#endif
skein512_4way_full( &ctx_skein, vhash64, input, 80 );
// skein512_4way_update( &ctx_skein, input + (64*4), 16 );
// skein512_4way_close( &ctx_skein, vhash64 );
skein512_4way_final16( &ctx_skein, vhash64, input + (64*4) );
#if defined(__SHA__)
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 512 );
@@ -156,8 +148,7 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
// skein512_4way_init( &skein512_4way_ctx );
// skein512_4way_update( &skein512_4way_ctx, vdata, 64 );
skein512_4way_prehash64( &skein512_4way_ctx, vdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );

View File

@@ -728,6 +728,86 @@ void skein512_8way_full( skein512_8way_context *sc, void *out, const void *data,
casti_m512i( out, 7 ) = h7;
}
void
skein512_8way_prehash64( skein512_8way_context *sc, const void *data )
{
__m512i *vdata = (__m512*)data;
__m512i *buf = sc->buf;
buf[0] = vdata[0];
buf[1] = vdata[1];
buf[2] = vdata[2];
buf[3] = vdata[3];
buf[4] = vdata[4];
buf[5] = vdata[5];
buf[6] = vdata[6];
buf[7] = vdata[7];
register __m512i h0 = m512_const1_64( 0x4903ADFF749C51CE );
register __m512i h1 = m512_const1_64( 0x0D95DE399746DF03 );
register __m512i h2 = m512_const1_64( 0x8FD1934127C79BCE );
register __m512i h3 = m512_const1_64( 0x9A255629FF352CB1 );
register __m512i h4 = m512_const1_64( 0x5DB62599DF6CA7B0 );
register __m512i h5 = m512_const1_64( 0xEABE394CA9D5C3F4 );
register __m512i h6 = m512_const1_64( 0x991112C71A75B523 );
register __m512i h7 = m512_const1_64( 0xAE18A40B660FCC33 );
uint64_t bcount = 1;
UBI_BIG_8WAY( 224, 0 );
sc->h0 = h0;
sc->h1 = h1;
sc->h2 = h2;
sc->h3 = h3;
sc->h4 = h4;
sc->h5 = h5;
sc->h6 = h6;
sc->h7 = h7;
}
void
skein512_8way_final16( skein512_8way_context *sc, void *output,
const void *data )
{
__m512i *in = (__m512i*)data;
__m512i *buf = sc->buf;
__m512i *out = (__m512i*)output;
register __m512i h0 = sc->h0;
register __m512i h1 = sc->h1;
register __m512i h2 = sc->h2;
register __m512i h3 = sc->h3;
register __m512i h4 = sc->h4;
register __m512i h5 = sc->h5;
register __m512i h6 = sc->h6;
register __m512i h7 = sc->h7;
const __m512i zero = m512_zero;
buf[0] = in[0];
buf[1] = in[1];
buf[2] = zero;
buf[3] = zero;
buf[4] = zero;
buf[5] = zero;
buf[6] = zero;
buf[7] = zero;
uint64_t bcount = 1;
UBI_BIG_8WAY( 352, 16 );
buf[0] = zero;
buf[1] = zero;
bcount = 0;
UBI_BIG_8WAY( 510, 8 );
out[0] = h0;
out[1] = h1;
out[2] = h2;
out[3] = h3;
out[4] = h4;
out[5] = h5;
out[6] = h6;
out[7] = h7;
}
void
skein256_8way_update(void *cc, const void *data, size_t len)
{
@@ -942,6 +1022,83 @@ skein512_4way_full( skein512_4way_context *sc, void *out, const void *data,
casti_m256i( out, 7 ) = h7;
}
void
skein512_4way_prehash64( skein512_4way_context *sc, const void *data )
{
__m256i *vdata = (__m256i*)data;
__m256i *buf = sc->buf;
buf[0] = vdata[0];
buf[1] = vdata[1];
buf[2] = vdata[2];
buf[3] = vdata[3];
buf[4] = vdata[4];
buf[5] = vdata[5];
buf[6] = vdata[6];
buf[7] = vdata[7];
register __m256i h0 = m256_const1_64( 0x4903ADFF749C51CE );
register __m256i h1 = m256_const1_64( 0x0D95DE399746DF03 );
register __m256i h2 = m256_const1_64( 0x8FD1934127C79BCE );
register __m256i h3 = m256_const1_64( 0x9A255629FF352CB1 );
register __m256i h4 = m256_const1_64( 0x5DB62599DF6CA7B0 );
register __m256i h5 = m256_const1_64( 0xEABE394CA9D5C3F4 );
register __m256i h6 = m256_const1_64( 0x991112C71A75B523 );
register __m256i h7 = m256_const1_64( 0xAE18A40B660FCC33 );
uint64_t bcount = 1;
UBI_BIG_4WAY( 224, 0 );
sc->h0 = h0;
sc->h1 = h1;
sc->h2 = h2;
sc->h3 = h3;
sc->h4 = h4;
sc->h5 = h5;
sc->h6 = h6;
sc->h7 = h7;
}
void
skein512_4way_final16( skein512_4way_context *sc, void *out, const void *data )
{
__m256i *vdata = (__m256i*)data;
__m256i *buf = sc->buf;
register __m256i h0 = sc->h0;
register __m256i h1 = sc->h1;
register __m256i h2 = sc->h2;
register __m256i h3 = sc->h3;
register __m256i h4 = sc->h4;
register __m256i h5 = sc->h5;
register __m256i h6 = sc->h6;
register __m256i h7 = sc->h7;
const __m256i zero = m256_zero;
buf[0] = vdata[0];
buf[1] = vdata[1];
buf[2] = zero;
buf[3] = zero;
buf[4] = zero;
buf[5] = zero;
buf[6] = zero;
buf[7] = zero;
uint64_t bcount = 1;
UBI_BIG_4WAY( 352, 16 );
buf[0] = zero;
buf[1] = zero;
bcount = 0;
UBI_BIG_4WAY( 510, 8 );
casti_m256i( out, 0 ) = h0;
casti_m256i( out, 1 ) = h1;
casti_m256i( out, 2 ) = h2;
casti_m256i( out, 3 ) = h3;
casti_m256i( out, 4 ) = h4;
casti_m256i( out, 5 ) = h5;
casti_m256i( out, 6 ) = h6;
casti_m256i( out, 7 ) = h7;
}
void
skein256_4way_update(void *cc, const void *data, size_t len)
{

View File

@@ -69,6 +69,10 @@ void skein512_8way_init( skein512_8way_context *sc );
void skein512_8way_update( void *cc, const void *data, size_t len );
void skein512_8way_close( void *cc, void *dst );
void skein512_8way_prehash64( skein512_8way_context *sc, const void *data );
void skein512_8way_final16( skein512_8way_context *sc, void *out,
const void *data );
void skein256_8way_init( skein256_8way_context *sc );
void skein256_8way_update( void *cc, const void *data, size_t len );
void skein256_8way_close( void *cc, void *dst );
@@ -96,6 +100,10 @@ void skein256_4way_init( skein256_4way_context *sc );
void skein256_4way_update( void *cc, const void *data, size_t len );
void skein256_4way_close( void *cc, void *dst );
void skein512_4way_prehash64( skein512_4way_context *sc, const void *data );
void skein512_4way_final16( skein512_4way_context *sc, void *out,
const void *data );
#ifdef __cplusplus
}
#endif

View File

@@ -5,20 +5,16 @@
#if defined(SKEIN_8WAY)
// static __thread skein512_8way_context skein512_8way_ctx
// __attribute__ ((aligned (64)));
static __thread skein512_8way_context skein512_8way_ctx
__attribute__ ((aligned (64)));
void skein2hash_8way( void *output, const void *input )
{
uint64_t hash[16*8] __attribute__ ((aligned (128)));
skein512_8way_context ctx;
// memcpy( &ctx, &skein512_8way_ctx, sizeof( ctx ) );
skein512_8way_full( &ctx, hash, input, 80 );
// skein512_8way_update( &ctx, input + (64*8), 16 );
// skein512_8way_close( &ctx, hash );
memcpy( &ctx, &skein512_8way_ctx, sizeof( ctx ) );
skein512_8way_final16( &ctx, hash, input + (64*8) );
skein512_8way_full( &ctx, output, hash, 64 );
}
@@ -38,16 +34,17 @@ int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
__m512i *noncev = (__m512i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
skein512_8way_context ctx;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
// skein512_8way_init( &skein512_8way_ctx );
// skein512_8way_update( &skein512_8way_ctx, vdata, 64 );
skein512_8way_prehash64( &ctx, vdata );
do
{
skein2hash_8way( hash, vdata );
skein512_8way_final16( &ctx, hash, vdata + (16*8) );
skein512_8way_full( &ctx, hash, hash, 64 );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hashq3[ lane ] <= targq3 && !bench ) )
@@ -71,19 +68,16 @@ int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
#elif defined(SKEIN_4WAY)
//static __thread skein512_4way_context skein512_4way_ctx
// __attribute__ ((aligned (64)));
static __thread skein512_4way_context skein512_4way_ctx
__attribute__ ((aligned (64)));
void skein2hash_4way( void *output, const void *input )
{
skein512_4way_context ctx;
// memcpy( &ctx, &skein512_4way_ctx, sizeof( ctx ) );
memcpy( &ctx, &skein512_4way_ctx, sizeof( ctx ) );
uint64_t hash[16*4] __attribute__ ((aligned (64)));
// skein512_4way_update( &ctx, input + (64*4), 16 );
// skein512_4way_close( &ctx, hash );
skein512_4way_full( &ctx, hash, input, 80 );
skein512_4way_final16( &ctx, hash, input + (64*4) );
skein512_4way_full( &ctx, output, hash, 64 );
}
@@ -103,15 +97,16 @@ int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
__m256i *noncev = (__m256i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
skein512_4way_context ctx;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
// skein512_4way_init( &skein512_4way_ctx );
// skein512_4way_update( &skein512_4way_ctx, vdata, 64 );
skein512_4way_prehash64( &ctx, vdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
skein2hash_4way( hash, vdata );
skein512_4way_final16( &ctx, hash, vdata + (16*4) );
skein512_4way_full( &ctx, hash, hash, 64 );
for ( int lane = 0; lane < 4; lane++ )
if ( hash_q3[ lane ] <= targ_q3 )

View File

@@ -120,6 +120,13 @@ void sph_whirlpool(void *cc, const void *data, size_t len);
*/
void sph_whirlpool_close(void *cc, void *dst);
#define sph_whirlpool512_full( cc, dst, data, len ) \
do{ \
sph_whirlpool_init( cc ); \
sph_whirlpool( cc, data, len ); \
sph_whirlpool_close( cc, dst ); \
}while(0)
/**
* WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL.
*/

View File

@@ -35,8 +35,7 @@ void skunk_8way_hash( void *output, const void *input )
skunk_8way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &skunk_8way_ctx, sizeof(skunk_8way_ctx) );
skein512_8way_update( &ctx.skein, input, 80 );
skein512_8way_close( &ctx.skein, vhash );
skein512_8way_final16( &ctx.skein, vhash, input );
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash, 512 );
@@ -104,35 +103,35 @@ int scanhash_skunk_8way( struct work *work, uint32_t max_nonce,
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id;
__m512i *noncev = (__m512i*)vdata + 9;
const int thr_id = mythr->id;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
const bool bench = opt_benchmark;
if ( opt_benchmark )
((uint32_t*)ptarget)[7] = 0x0cff;
if ( bench ) ptarget[7] = 0x0fff;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
skein512_8way_prehash64( &skunk_8way_ctx.skein, vdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
do
{
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
skunk_8way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 8; i++ )
if ( unlikely( (hash+(i<<3))[7] <= Htarg ) )
if ( likely( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) )
if ( unlikely( valid_hash( hash+(i<<3), ptarget ) && !bench ) )
{
pdata[19] = n+i;
pdata[19] = bswap_32( n+i );
submit_lane_solution( work, hash+(i<<3), mythr, i );
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n +=8;
} while ( likely( ( n < max_nonce-8 ) && !(*restart) ) );
} while ( likely( ( n < last_nonce ) && !( *restart ) ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
@@ -159,17 +158,16 @@ static __thread skunk_4way_ctx_holder skunk_4way_ctx;
void skunk_4way_hash( void *output, const void *input )
{
uint64_t vhash[8*4] __attribute__ ((aligned (128)));
uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64)));
uint64_t hash2[8] __attribute__ ((aligned (64)));
uint64_t hash3[8] __attribute__ ((aligned (64)));
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
skunk_4way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &skunk_4way_ctx, sizeof(skunk_4way_ctx) );
skein512_4way_update( &ctx.skein, input, 80 );
skein512_4way_close( &ctx.skein, vhash );
skein512_4way_final16( &ctx.skein, vhash, input + (64*4) );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*)hash0, 64 );
@@ -213,40 +211,40 @@ void skunk_4way_hash( void *output, const void *input )
int scanhash_skunk_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t hash[4*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id; // thr_id arg is deprecated
volatile uint8_t *restart = &(work_restart[thr_id].restart);
__m256i *noncev = (__m256i*)vdata + 9;
const int thr_id = mythr->id;
volatile uint8_t *restart = &( work_restart[ thr_id ].restart );
const bool bench = opt_benchmark;
if ( opt_benchmark )
((uint32_t*)ptarget)[7] = 0x0cff;
if ( bench ) ptarget[7] = 0x0fff;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
skein512_4way_prehash64( &skunk_4way_ctx.skein, vdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
skunk_4way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg )
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
if ( unlikely( valid_hash( hash+(i<<3), ptarget ) && !bench ) )
{
pdata[19] = n+i;
pdata[19] = bswap_32( n + i );
submit_lane_solution( work, hash+(i<<3), mythr, i );
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n +=4;
} while ( ( n < max_nonce ) && !(*restart) );
*hashes_done = n - first_nonce + 1;
} while ( likely( ( n < last_nonce ) && !( *restart ) ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -47,6 +47,7 @@ static void hex_getAlgoString(const uint32_t* prevblock, char *output)
*sptr = '\0';
}
/*
union _hex_context_overlay
{
#if defined(__AES__)
@@ -63,7 +64,7 @@ union _hex_context_overlay
sph_keccak512_context keccak;
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
shavite512_context shavite;
hashState_sd simd;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
@@ -72,13 +73,14 @@ union _hex_context_overlay
SHA512_CTX sha512;
};
typedef union _hex_context_overlay hex_context_overlay;
*/
static __thread hex_context_overlay hex_ctx;
static __thread x16r_context_overlay hex_ctx;
void hex_hash( void* output, const void* input )
{
uint32_t _ALIGN(128) hash[16];
hex_context_overlay ctx;
x16r_context_overlay ctx;
memcpy( &ctx, &hex_ctx, sizeof(ctx) );
void *in = (void*) input;
int size = 80;
@@ -157,9 +159,7 @@ void hex_hash( void* output, const void* input )
}
break;
case SHAVITE:
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in, size );
sph_shavite512_close( &ctx.shavite, hash );
shavite512_full( &ctx.shavite, hash, in, size );
break;
case SIMD:
init_sd( &ctx.simd, 512 );
@@ -187,9 +187,7 @@ void hex_hash( void* output, const void* input )
sph_hamsi512_close( &ctx.hamsi, hash );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in, size );
sph_fugue512_close( &ctx.fugue, hash );
sph_fugue512_full( &ctx.fugue, hash, in, size );
break;
case SHABAL:
if ( i == 0 )
@@ -203,13 +201,12 @@ void hex_hash( void* output, const void* input )
break;
case WHIRLPOOL:
if ( i == 0 )
sph_whirlpool( &ctx.whirlpool, in+64, 16 );
else
{
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in, size );
sph_whirlpool( &ctx.whirlpool, in+64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash );
}
sph_whirlpool_close( &ctx.whirlpool, hash );
else
sph_whirlpool512_full( &ctx.whirlpool, hash, in, size );
break;
case SHA_512:
SHA512_Init( &ctx.sha512 );

View File

@@ -287,30 +287,14 @@ void x16r_8way_hash_generic( void* output, const void* input )
shavite512_4way_full( &ctx.shavite, vhash, vhash, size );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in0, size );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in1, size );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in2, size );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in3, size );
sph_shavite512_close( &ctx.shavite, hash3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in4, size );
sph_shavite512_close( &ctx.shavite, hash4 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in5, size );
sph_shavite512_close( &ctx.shavite, hash5 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in6, size );
sph_shavite512_close( &ctx.shavite, hash6 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in7, size );
sph_shavite512_close( &ctx.shavite, hash7 );
shavite512_full( &ctx.shavite, hash0, in0, size );
shavite512_full( &ctx.shavite, hash1, in1, size );
shavite512_full( &ctx.shavite, hash2, in2, size );
shavite512_full( &ctx.shavite, hash3, in3, size );
shavite512_full( &ctx.shavite, hash4, in4, size );
shavite512_full( &ctx.shavite, hash5, in5, size );
shavite512_full( &ctx.shavite, hash6, in6, size );
shavite512_full( &ctx.shavite, hash7, in7, size );
#endif
break;
case SIMD:
@@ -363,30 +347,14 @@ void x16r_8way_hash_generic( void* output, const void* input )
hash7, vhash );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in0, size );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in1, size );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in2, size );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in3, size );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in4, size );
sph_fugue512_close( &ctx.fugue, hash4 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in5, size );
sph_fugue512_close( &ctx.fugue, hash5 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in6, size );
sph_fugue512_close( &ctx.fugue, hash6 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in7, size );
sph_fugue512_close( &ctx.fugue, hash7 );
sph_fugue512_full( &ctx.fugue, hash0, in0, size );
sph_fugue512_full( &ctx.fugue, hash1, in1, size );
sph_fugue512_full( &ctx.fugue, hash2, in2, size );
sph_fugue512_full( &ctx.fugue, hash3, in3, size );
sph_fugue512_full( &ctx.fugue, hash4, in4, size );
sph_fugue512_full( &ctx.fugue, hash5, in5, size );
sph_fugue512_full( &ctx.fugue, hash6, in6, size );
sph_fugue512_full( &ctx.fugue, hash7, in7, size );
break;
case SHABAL:
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
@@ -431,30 +399,14 @@ void x16r_8way_hash_generic( void* output, const void* input )
}
else
{
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in0, size );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in1, size );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in2, size );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in3, size );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in4, size );
sph_whirlpool_close( &ctx.whirlpool, hash4 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in5, size );
sph_whirlpool_close( &ctx.whirlpool, hash5 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in6, size );
sph_whirlpool_close( &ctx.whirlpool, hash6 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in7, size );
sph_whirlpool_close( &ctx.whirlpool, hash7 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, in0, size );
sph_whirlpool512_full( &ctx.whirlpool, hash1, in1, size );
sph_whirlpool512_full( &ctx.whirlpool, hash2, in2, size );
sph_whirlpool512_full( &ctx.whirlpool, hash3, in3, size );
sph_whirlpool512_full( &ctx.whirlpool, hash4, in4, size );
sph_whirlpool512_full( &ctx.whirlpool, hash5, in5, size );
sph_whirlpool512_full( &ctx.whirlpool, hash6, in6, size );
sph_whirlpool512_full( &ctx.whirlpool, hash7, in7, size );
}
break;
case SHA_512:
@@ -576,8 +528,7 @@ void x16r_4way_prehash( void *vdata, void *pdata )
break;
case SKEIN:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
skein512_4way_init( &x16r_ctx.skein );
skein512_4way_update( &x16r_ctx.skein, vdata, 64 );
skein512_4way_prehash64( &x16r_ctx.skein, vdata );
break;
case LUFFA:
mm128_bswap32_80( edata, pdata );
@@ -692,10 +643,7 @@ void x16r_4way_hash_generic( void* output, const void* input )
break;
case SKEIN:
if ( i == 0 )
{
skein512_4way_update( &ctx.skein, input + (64<<2), 16 );
skein512_4way_close( &ctx.skein, vhash );
}
skein512_4way_final16( &ctx.skein, vhash, input + (64*4) );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
@@ -756,18 +704,10 @@ void x16r_4way_hash_generic( void* output, const void* input )
}
break;
case SHAVITE:
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in0, size );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in1, size );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in2, size );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in3, size );
sph_shavite512_close( &ctx.shavite, hash3 );
shavite512_full( &ctx.shavite, hash0, in0, size );
shavite512_full( &ctx.shavite, hash1, in1, size );
shavite512_full( &ctx.shavite, hash2, in2, size );
shavite512_full( &ctx.shavite, hash3, in3, size );
break;
case SIMD:
intrlv_2x128( vhash, in0, in1, size<<3 );
@@ -800,18 +740,10 @@ void x16r_4way_hash_generic( void* output, const void* input )
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in0, size );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in1, size );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in2, size );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in3, size );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_full( &ctx.fugue, hash0, in0, size );
sph_fugue512_full( &ctx.fugue, hash1, in1, size );
sph_fugue512_full( &ctx.fugue, hash2, in2, size );
sph_fugue512_full( &ctx.fugue, hash3, in3, size );
break;
case SHABAL:
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
@@ -842,18 +774,10 @@ void x16r_4way_hash_generic( void* output, const void* input )
}
else
{
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in0, size );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in1, size );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in2, size );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in3, size );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, in0, size );
sph_whirlpool512_full( &ctx.whirlpool, hash1, in1, size );
sph_whirlpool512_full( &ctx.whirlpool, hash2, in2, size );
sph_whirlpool512_full( &ctx.whirlpool, hash3, in3, size );
}
break;
case SHA_512:

View File

@@ -121,7 +121,7 @@ union _x16r_8way_context_overlay
echo_4way_context echo;
#else
hashState_groestl groestl;
sph_shavite512_context shavite;
shavite512_context shavite;
hashState_echo echo;
#endif
} __attribute__ ((aligned (64)));
@@ -152,7 +152,7 @@ union _x16r_4way_context_overlay
luffa_2way_context luffa;
hashState_luffa luffa1;
cubehashParam cube;
sph_shavite512_context shavite;
shavite512_context shavite;
simd_2way_context simd;
hamsi512_4way_context hamsi;
sph_fugue512_context fugue;
@@ -191,7 +191,7 @@ union _x16r_context_overlay
sph_keccak512_context keccak;
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
shavite512_context shavite;
hashState_sd simd;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;

View File

@@ -124,9 +124,7 @@ void x16r_hash_generic( void* output, const void* input )
(byte*)in, size );
break;
case SHAVITE:
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in, size );
sph_shavite512_close( &ctx.shavite, hash );
shavite512_full( &ctx.shavite, hash, in, size );
break;
case SIMD:
simd_full( &ctx.simd, (BitSequence *)hash,
@@ -153,9 +151,7 @@ void x16r_hash_generic( void* output, const void* input )
sph_hamsi512_close( &ctx.hamsi, hash );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in, size );
sph_fugue512_close( &ctx.fugue, hash );
sph_fugue512_full( &ctx.fugue, hash, in, size );
break;
case SHABAL:
if ( i == 0 )
@@ -169,13 +165,12 @@ void x16r_hash_generic( void* output, const void* input )
break;
case WHIRLPOOL:
if ( i == 0 )
sph_whirlpool( &ctx.whirlpool, in+64, 16 );
else
{
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in, size );
sph_whirlpool( &ctx.whirlpool, in+64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash );
}
sph_whirlpool_close( &ctx.whirlpool, hash );
else
sph_whirlpool512_full( &ctx.whirlpool, hash, in, size );
break;
case SHA_512:
SHA512_Init( &ctx.sha512 );

View File

@@ -57,7 +57,7 @@ union _x16rv2_8way_context_overlay
echo_4way_context echo;
#else
hashState_groestl groestl;
sph_shavite512_context shavite;
shavite512_context shavite;
hashState_echo echo;
#endif
} __attribute__ ((aligned (64)));
@@ -371,30 +371,14 @@ void x16rv2_8way_hash( void* output, const void* input )
shavite512_4way_full( &ctx.shavite, vhash, vhash, size );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in0, size );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in1, size );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in2, size );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in3, size );
sph_shavite512_close( &ctx.shavite, hash3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in4, size );
sph_shavite512_close( &ctx.shavite, hash4 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in5, size );
sph_shavite512_close( &ctx.shavite, hash5 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in6, size );
sph_shavite512_close( &ctx.shavite, hash6 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in7, size );
sph_shavite512_close( &ctx.shavite, hash7 );
shavite512_full( &ctx.shavite, hash0, in0, size );
shavite512_full( &ctx.shavite, hash1, in1, size );
shavite512_full( &ctx.shavite, hash2, in2, size );
shavite512_full( &ctx.shavite, hash3, in3, size );
shavite512_full( &ctx.shavite, hash4, in4, size );
shavite512_full( &ctx.shavite, hash5, in5, size );
shavite512_full( &ctx.shavite, hash6, in6, size );
shavite512_full( &ctx.shavite, hash7, in7, size );
#endif
break;
case SIMD:
@@ -448,30 +432,14 @@ void x16rv2_8way_hash( void* output, const void* input )
hash7, vhash );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in0, size );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in1, size );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in2, size );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in3, size );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in4, size );
sph_fugue512_close( &ctx.fugue, hash4 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in5, size );
sph_fugue512_close( &ctx.fugue, hash5 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in6, size );
sph_fugue512_close( &ctx.fugue, hash6 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in7, size );
sph_fugue512_close( &ctx.fugue, hash7 );
sph_fugue512_full( &ctx.fugue, hash0, in0, size );
sph_fugue512_full( &ctx.fugue, hash1, in1, size );
sph_fugue512_full( &ctx.fugue, hash2, in2, size );
sph_fugue512_full( &ctx.fugue, hash3, in3, size );
sph_fugue512_full( &ctx.fugue, hash4, in4, size );
sph_fugue512_full( &ctx.fugue, hash5, in5, size );
sph_fugue512_full( &ctx.fugue, hash6, in6, size );
sph_fugue512_full( &ctx.fugue, hash7, in7, size );
break;
case SHABAL:
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
@@ -516,30 +484,14 @@ void x16rv2_8way_hash( void* output, const void* input )
}
else
{
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in0, size );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in1, size );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in2, size );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in3, size );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in4, size );
sph_whirlpool_close( &ctx.whirlpool, hash4 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in5, size );
sph_whirlpool_close( &ctx.whirlpool, hash5 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in6, size );
sph_whirlpool_close( &ctx.whirlpool, hash6 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in7, size );
sph_whirlpool_close( &ctx.whirlpool, hash7 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, in0, size );
sph_whirlpool512_full( &ctx.whirlpool, hash1, in1, size );
sph_whirlpool512_full( &ctx.whirlpool, hash2, in2, size );
sph_whirlpool512_full( &ctx.whirlpool, hash3, in3, size );
sph_whirlpool512_full( &ctx.whirlpool, hash4, in4, size );
sph_whirlpool512_full( &ctx.whirlpool, hash5, in5, size );
sph_whirlpool512_full( &ctx.whirlpool, hash6, in6, size );
sph_whirlpool512_full( &ctx.whirlpool, hash7, in7, size );
}
break;
case SHA_512:
@@ -747,7 +699,7 @@ union _x16rv2_4way_context_overlay
keccak512_4way_context keccak;
luffa_2way_context luffa;
cubehashParam cube;
sph_shavite512_context shavite;
shavite512_context shavite;
simd_2way_context simd;
hamsi512_4way_context hamsi;
sph_fugue512_context fugue;
@@ -831,47 +783,47 @@ void x16rv2_4way_hash( void* output, const void* input )
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
break;
case KECCAK:
if ( i == 0 )
{
sph_tiger( &ctx.tiger, in0 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash0 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_tiger( &ctx.tiger, in1 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash1 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_tiger( &ctx.tiger, in2 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash2 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_tiger( &ctx.tiger, in3 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash3 );
}
else
{
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in0, size );
sph_tiger_close( &ctx.tiger, hash0 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in1, size );
sph_tiger_close( &ctx.tiger, hash1 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in2, size );
sph_tiger_close( &ctx.tiger, hash2 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in3, size );
sph_tiger_close( &ctx.tiger, hash3 );
}
for ( int i = (24/4); i < (64/4); i++ )
if ( i == 0 )
{
sph_tiger( &ctx.tiger, in0 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash0 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_tiger( &ctx.tiger, in1 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash1 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_tiger( &ctx.tiger, in2 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash2 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_tiger( &ctx.tiger, in3 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash3 );
}
else
{
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in0, size );
sph_tiger_close( &ctx.tiger, hash0 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in1, size );
sph_tiger_close( &ctx.tiger, hash1 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in2, size );
sph_tiger_close( &ctx.tiger, hash2 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in3, size );
sph_tiger_close( &ctx.tiger, hash3 );
}
for ( int i = (24/4); i < (64/4); i++ )
hash0[i] = hash1[i] = hash2[i] = hash3[i] = 0;
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
keccak512_4way_init( &ctx.keccak );
keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
keccak512_4way_init( &ctx.keccak );
keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case SKEIN:
if ( i == 0 )
skein512_4way_update( &ctx.skein, input + (64<<2), 16 );
skein512_4way_final16( &ctx.skein, vhash, input + (64*4) );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
@@ -882,46 +834,46 @@ void x16rv2_4way_hash( void* output, const void* input )
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
break;
case LUFFA:
if ( i == 0 )
{
sph_tiger( &ctx.tiger, in0 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash0 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_tiger( &ctx.tiger, in1 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash1 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_tiger( &ctx.tiger, in2 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash2 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_tiger( &ctx.tiger, in3 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash3 );
}
else
{
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in0, size );
sph_tiger_close( &ctx.tiger, hash0 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in1, size );
sph_tiger_close( &ctx.tiger, hash1 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in2, size );
sph_tiger_close( &ctx.tiger, hash2 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in3, size );
sph_tiger_close( &ctx.tiger, hash3 );
}
for ( int i = (24/4); i < (64/4); i++ )
if ( i == 0 )
{
sph_tiger( &ctx.tiger, in0 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash0 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_tiger( &ctx.tiger, in1 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash1 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_tiger( &ctx.tiger, in2 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash2 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_tiger( &ctx.tiger, in3 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash3 );
}
else
{
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in0, size );
sph_tiger_close( &ctx.tiger, hash0 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in1, size );
sph_tiger_close( &ctx.tiger, hash1 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in2, size );
sph_tiger_close( &ctx.tiger, hash2 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in3, size );
sph_tiger_close( &ctx.tiger, hash3 );
}
for ( int i = (24/4); i < (64/4); i++ )
hash0[i] = hash1[i] = hash2[i] = hash3[i] = 0;
intrlv_2x128( vhash, hash0, hash1, 512 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
intrlv_2x128( vhash, hash2, hash3, 512 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
dintrlv_2x128( hash2, hash3, vhash, 512 );
intrlv_2x128( vhash, hash0, hash1, 512 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
intrlv_2x128( vhash, hash2, hash3, 512 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
dintrlv_2x128( hash2, hash3, vhash, 512 );
break;
case CUBEHASH:
if ( i == 0 )
@@ -955,18 +907,10 @@ void x16rv2_4way_hash( void* output, const void* input )
}
break;
case SHAVITE:
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in0, size );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in1, size );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in2, size );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in3, size );
sph_shavite512_close( &ctx.shavite, hash3 );
shavite512_full( &ctx.shavite, hash0, in0, size );
shavite512_full( &ctx.shavite, hash1, in1, size );
shavite512_full( &ctx.shavite, hash2, in2, size );
shavite512_full( &ctx.shavite, hash3, in3, size );
break;
case SIMD:
intrlv_2x128( vhash, in0, in1, size<<3 );
@@ -999,18 +943,10 @@ void x16rv2_4way_hash( void* output, const void* input )
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in0, size );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in1, size );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in2, size );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in3, size );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_full( &ctx.fugue, hash0, in0, size );
sph_fugue512_full( &ctx.fugue, hash1, in1, size );
sph_fugue512_full( &ctx.fugue, hash2, in2, size );
sph_fugue512_full( &ctx.fugue, hash3, in3, size );
break;
case SHABAL:
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
@@ -1041,18 +977,10 @@ void x16rv2_4way_hash( void* output, const void* input )
}
else
{
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in0, size );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in1, size );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in2, size );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in3, size );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, in0, size );
sph_whirlpool512_full( &ctx.whirlpool, hash1, in1, size );
sph_whirlpool512_full( &ctx.whirlpool, hash2, in2, size );
sph_whirlpool512_full( &ctx.whirlpool, hash3, in3, size );
}
break;
case SHA_512:
@@ -1117,7 +1045,7 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
__m256i *noncev = (__m256i*)vdata + 9;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
const bool bench = opt_benchmark;
@@ -1134,7 +1062,7 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime );
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
}
// Do midstate prehash on hash functions with block size <= 64 bytes.
@@ -1157,8 +1085,7 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
break;
case SKEIN:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
skein512_4way_init( &x16rv2_ctx.skein );
skein512_4way_update( &x16rv2_ctx.skein, vdata, 64 );
skein512_4way_prehash64( &x16r_ctx.skein, vdata );
break;
case CUBEHASH:
mm128_bswap32_80( edata, pdata );

View File

@@ -51,7 +51,7 @@ union _x16rv2_context_overlay
sph_keccak512_context keccak;
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
shavite512_context shavite;
hashState_sd simd;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
@@ -136,9 +136,7 @@ void x16rv2_hash( void* output, const void* input )
(const byte*)in, size );
break;
case SHAVITE:
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in, size );
sph_shavite512_close( &ctx.shavite, hash );
shavite512_full( &ctx.shavite, hash, in, size );
break;
case SIMD:
init_sd( &ctx.simd, 512 );
@@ -162,9 +160,7 @@ void x16rv2_hash( void* output, const void* input )
sph_hamsi512_close( &ctx.hamsi, hash );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in, size );
sph_fugue512_close( &ctx.fugue, hash );
sph_fugue512_full( &ctx.fugue, hash, in, size );
break;
case SHABAL:
sph_shabal512_init( &ctx.shabal );
@@ -172,9 +168,7 @@ void x16rv2_hash( void* output, const void* input )
sph_shabal512_close( &ctx.shabal, hash );
break;
case WHIRLPOOL:
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in, size );
sph_whirlpool_close( &ctx.whirlpool, hash );
sph_whirlpool512_full( &ctx.whirlpool, hash, in, size );
break;
case SHA_512:
sph_tiger_init( &ctx.tiger );

View File

@@ -127,40 +127,22 @@ void sonoa_8way_hash( void *state, const void *input )
#if defined(__VAES__)
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, 64 );
#else
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash0, 64 );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash1, 64 );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash2, 64 );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash4, 64 );
sph_shavite512_close( &ctx.shavite, hash4 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash5, 64 );
sph_shavite512_close( &ctx.shavite, hash5 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash6, 64 );
sph_shavite512_close( &ctx.shavite, hash6 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash7, 64 );
sph_shavite512_close( &ctx.shavite, hash7 );
shavite512_full( &ctx.shavite, hash0, hash0, 64 );
shavite512_full( &ctx.shavite, hash1, hash1, 64 );
shavite512_full( &ctx.shavite, hash2, hash2, 64 );
shavite512_full( &ctx.shavite, hash3, hash3, 64 );
shavite512_full( &ctx.shavite, hash4, hash4, 64 );
shavite512_full( &ctx.shavite, hash5, hash5, 64 );
shavite512_full( &ctx.shavite, hash6, hash6, 64 );
shavite512_full( &ctx.shavite, hash7, hash7, 64 );
intrlv_4x128_512( vhashA, hash0, hash1, hash2, hash3 );
intrlv_4x128_512( vhashB, hash4, hash5, hash6, hash7 );
@@ -236,9 +218,7 @@ void sonoa_8way_hash( void *state, const void *input )
#endif
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, 64 );
skein512_8way_close( &ctx.skein, vhash );
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, 64 );
@@ -258,40 +238,22 @@ void sonoa_8way_hash( void *state, const void *input )
#if defined(__VAES__)
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, 64 );
#else
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash0, 64 );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash1, 64 );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash2, 64 );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash4, 64 );
sph_shavite512_close( &ctx.shavite, hash4 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash5, 64 );
sph_shavite512_close( &ctx.shavite, hash5 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash6, 64 );
sph_shavite512_close( &ctx.shavite, hash6 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash7, 64 );
sph_shavite512_close( &ctx.shavite, hash7 );
shavite512_full( &ctx.shavite, hash0, hash0, 64 );
shavite512_full( &ctx.shavite, hash1, hash1, 64 );
shavite512_full( &ctx.shavite, hash2, hash2, 64 );
shavite512_full( &ctx.shavite, hash3, hash3, 64 );
shavite512_full( &ctx.shavite, hash4, hash4, 64 );
shavite512_full( &ctx.shavite, hash5, hash5, 64 );
shavite512_full( &ctx.shavite, hash6, hash6, 64 );
shavite512_full( &ctx.shavite, hash7, hash7, 64 );
intrlv_4x128_512( vhashA, hash0, hash1, hash2, hash3 );
intrlv_4x128_512( vhashB, hash4, hash5, hash6, hash7 );
@@ -393,40 +355,22 @@ void sonoa_8way_hash( void *state, const void *input )
#if defined(__VAES__)
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, 64 );
#else
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash0, 64 );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash1, 64 );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash2, 64 );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash4, 64 );
sph_shavite512_close( &ctx.shavite, hash4 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash5, 64 );
sph_shavite512_close( &ctx.shavite, hash5 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash6, 64 );
sph_shavite512_close( &ctx.shavite, hash6 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash7, 64 );
sph_shavite512_close( &ctx.shavite, hash7 );
shavite512_full( &ctx.shavite, hash0, hash0, 64 );
shavite512_full( &ctx.shavite, hash1, hash1, 64 );
shavite512_full( &ctx.shavite, hash2, hash2, 64 );
shavite512_full( &ctx.shavite, hash3, hash3, 64 );
shavite512_full( &ctx.shavite, hash4, hash4, 64 );
shavite512_full( &ctx.shavite, hash5, hash5, 64 );
shavite512_full( &ctx.shavite, hash6, hash6, 64 );
shavite512_full( &ctx.shavite, hash7, hash7, 64 );
intrlv_4x128_512( vhashA, hash0, hash1, hash2, hash3 );
intrlv_4x128_512( vhashB, hash4, hash5, hash6, hash7 );
@@ -477,30 +421,14 @@ void sonoa_8way_hash( void *state, const void *input )
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash4, 64 );
sph_fugue512_close( &ctx.fugue, hash4 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash5, 64 );
sph_fugue512_close( &ctx.fugue, hash5 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash6, 64 );
sph_fugue512_close( &ctx.fugue, hash6 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash7, 64 );
sph_fugue512_close( &ctx.fugue, hash7 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 );
sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 );
sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
// 4
@@ -537,9 +465,7 @@ void sonoa_8way_hash( void *state, const void *input )
#endif
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, 64 );
skein512_8way_close( &ctx.skein, vhash );
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, 64 );
@@ -559,40 +485,22 @@ void sonoa_8way_hash( void *state, const void *input )
#if defined(__VAES__)
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, 64 );
#else
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash0, 64 );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash1, 64 );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash2, 64 );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash4, 64 );
sph_shavite512_close( &ctx.shavite, hash4 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash5, 64 );
sph_shavite512_close( &ctx.shavite, hash5 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash6, 64 );
sph_shavite512_close( &ctx.shavite, hash6 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash7, 64 );
sph_shavite512_close( &ctx.shavite, hash7 );
shavite512_full( &ctx.shavite, hash0, hash0, 64 );
shavite512_full( &ctx.shavite, hash1, hash1, 64 );
shavite512_full( &ctx.shavite, hash2, hash2, 64 );
shavite512_full( &ctx.shavite, hash3, hash3, 64 );
shavite512_full( &ctx.shavite, hash4, hash4, 64 );
shavite512_full( &ctx.shavite, hash5, hash5, 64 );
shavite512_full( &ctx.shavite, hash6, hash6, 64 );
shavite512_full( &ctx.shavite, hash7, hash7, 64 );
intrlv_4x128_512( vhashA, hash0, hash1, hash2, hash3 );
intrlv_4x128_512( vhashB, hash4, hash5, hash6, hash7 );
@@ -643,30 +551,14 @@ void sonoa_8way_hash( void *state, const void *input )
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash4, 64 );
sph_fugue512_close( &ctx.fugue, hash4 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash5, 64 );
sph_fugue512_close( &ctx.fugue, hash5 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash6, 64 );
sph_fugue512_close( &ctx.fugue, hash6 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash7, 64 );
sph_fugue512_close( &ctx.fugue, hash7 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 );
sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 );
sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
@@ -714,39 +606,21 @@ void sonoa_8way_hash( void *state, const void *input )
#if defined(__VAES__)
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, 64 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
#else
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash0, 64 );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash1, 64 );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash2, 64 );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash4, 64 );
sph_shavite512_close( &ctx.shavite, hash4 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash5, 64 );
sph_shavite512_close( &ctx.shavite, hash5 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash6, 64 );
sph_shavite512_close( &ctx.shavite, hash6 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash7, 64 );
sph_shavite512_close( &ctx.shavite, hash7 );
shavite512_full( &ctx.shavite, hash0, hash0, 64 );
shavite512_full( &ctx.shavite, hash1, hash1, 64 );
shavite512_full( &ctx.shavite, hash2, hash2, 64 );
shavite512_full( &ctx.shavite, hash3, hash3, 64 );
shavite512_full( &ctx.shavite, hash4, hash4, 64 );
shavite512_full( &ctx.shavite, hash5, hash5, 64 );
shavite512_full( &ctx.shavite, hash6, hash6, 64 );
shavite512_full( &ctx.shavite, hash7, hash7, 64 );
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
@@ -791,9 +665,7 @@ void sonoa_8way_hash( void *state, const void *input )
#endif
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, 64 );
skein512_8way_close( &ctx.skein, vhash );
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, 64 );
@@ -813,40 +685,22 @@ void sonoa_8way_hash( void *state, const void *input )
#if defined(__VAES__)
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, 64 );
#else
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash0, 64 );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash1, 64 );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash2, 64 );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash4, 64 );
sph_shavite512_close( &ctx.shavite, hash4 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash5, 64 );
sph_shavite512_close( &ctx.shavite, hash5 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash6, 64 );
sph_shavite512_close( &ctx.shavite, hash6 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash7, 64 );
sph_shavite512_close( &ctx.shavite, hash7 );
shavite512_full( &ctx.shavite, hash0, hash0, 64 );
shavite512_full( &ctx.shavite, hash1, hash1, 64 );
shavite512_full( &ctx.shavite, hash2, hash2, 64 );
shavite512_full( &ctx.shavite, hash3, hash3, 64 );
shavite512_full( &ctx.shavite, hash4, hash4, 64 );
shavite512_full( &ctx.shavite, hash5, hash5, 64 );
shavite512_full( &ctx.shavite, hash6, hash6, 64 );
shavite512_full( &ctx.shavite, hash7, hash7, 64 );
intrlv_4x128_512( vhashA, hash0, hash1, hash2, hash3 );
intrlv_4x128_512( vhashB, hash4, hash5, hash6, hash7 );
@@ -897,30 +751,14 @@ void sonoa_8way_hash( void *state, const void *input )
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash4, 64 );
sph_fugue512_close( &ctx.fugue, hash4 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash5, 64 );
sph_fugue512_close( &ctx.fugue, hash5 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash6, 64 );
sph_fugue512_close( &ctx.fugue, hash6 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash7, 64 );
sph_fugue512_close( &ctx.fugue, hash7 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 );
sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 );
sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
@@ -932,30 +770,14 @@ void sonoa_8way_hash( void *state, const void *input )
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash4, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash4 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash5, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash5 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash6, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash6 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash7, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash7 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash4, hash4, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash5, hash5, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );
// 6
@@ -992,9 +814,7 @@ void sonoa_8way_hash( void *state, const void *input )
#endif
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, 64 );
skein512_8way_close( &ctx.skein, vhash );
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, 64 );
@@ -1014,40 +834,22 @@ void sonoa_8way_hash( void *state, const void *input )
#if defined(__VAES__)
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, 64 );
#else
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash0, 64 );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash1, 64 );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash2, 64 );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash4, 64 );
sph_shavite512_close( &ctx.shavite, hash4 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash5, 64 );
sph_shavite512_close( &ctx.shavite, hash5 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash6, 64 );
sph_shavite512_close( &ctx.shavite, hash6 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash7, 64 );
sph_shavite512_close( &ctx.shavite, hash7 );
shavite512_full( &ctx.shavite, hash0, hash0, 64 );
shavite512_full( &ctx.shavite, hash1, hash1, 64 );
shavite512_full( &ctx.shavite, hash2, hash2, 64 );
shavite512_full( &ctx.shavite, hash3, hash3, 64 );
shavite512_full( &ctx.shavite, hash4, hash4, 64 );
shavite512_full( &ctx.shavite, hash5, hash5, 64 );
shavite512_full( &ctx.shavite, hash6, hash6, 64 );
shavite512_full( &ctx.shavite, hash7, hash7, 64 );
intrlv_4x128_512( vhashA, hash0, hash1, hash2, hash3 );
intrlv_4x128_512( vhashB, hash4, hash5, hash6, hash7 );
@@ -1098,30 +900,14 @@ void sonoa_8way_hash( void *state, const void *input )
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash4, 64 );
sph_fugue512_close( &ctx.fugue, hash4 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash5, 64 );
sph_fugue512_close( &ctx.fugue, hash5 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash6, 64 );
sph_fugue512_close( &ctx.fugue, hash6 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash7, 64 );
sph_fugue512_close( &ctx.fugue, hash7 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 );
sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 );
sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
@@ -1133,30 +919,14 @@ void sonoa_8way_hash( void *state, const void *input )
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash4, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash4 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash5, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash5 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash6, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash6 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash7, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash7 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash4, hash4, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash5, hash5, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
@@ -1168,30 +938,14 @@ void sonoa_8way_hash( void *state, const void *input )
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash4, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash4 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash5, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash5 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash6, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash6 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash7, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash7 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash4, hash4, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash5, hash5, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );
// 7
@@ -1248,40 +1002,22 @@ void sonoa_8way_hash( void *state, const void *input )
#if defined(__VAES__)
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, 64 );
#else
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash0, 64 );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash1, 64 );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash2, 64 );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash4, 64 );
sph_shavite512_close( &ctx.shavite, hash4 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash5, 64 );
sph_shavite512_close( &ctx.shavite, hash5 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash6, 64 );
sph_shavite512_close( &ctx.shavite, hash6 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash7, 64 );
sph_shavite512_close( &ctx.shavite, hash7 );
shavite512_full( &ctx.shavite, hash0, hash0, 64 );
shavite512_full( &ctx.shavite, hash1, hash1, 64 );
shavite512_full( &ctx.shavite, hash2, hash2, 64 );
shavite512_full( &ctx.shavite, hash3, hash3, 64 );
shavite512_full( &ctx.shavite, hash4, hash4, 64 );
shavite512_full( &ctx.shavite, hash5, hash5, 64 );
shavite512_full( &ctx.shavite, hash6, hash6, 64 );
shavite512_full( &ctx.shavite, hash7, hash7, 64 );
intrlv_4x128_512( vhashA, hash0, hash1, hash2, hash3 );
intrlv_4x128_512( vhashB, hash4, hash5, hash6, hash7 );
@@ -1332,30 +1068,14 @@ void sonoa_8way_hash( void *state, const void *input )
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash4, 64 );
sph_fugue512_close( &ctx.fugue, hash4 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash5, 64 );
sph_fugue512_close( &ctx.fugue, hash5 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash6, 64 );
sph_fugue512_close( &ctx.fugue, hash6 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash7, 64 );
sph_fugue512_close( &ctx.fugue, hash7 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 );
sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 );
sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
@@ -1367,30 +1087,14 @@ void sonoa_8way_hash( void *state, const void *input )
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash4, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash4 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash5, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash5 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash6, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash6 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash7, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash7 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash4, hash4, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash5, hash5, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
@@ -1657,18 +1361,10 @@ void sonoa_4way_hash( void *state, const void *input )
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
// 4
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1730,18 +1426,10 @@ void sonoa_4way_hash( void *state, const void *input )
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1840,18 +1528,10 @@ void sonoa_4way_hash( void *state, const void *input )
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1861,18 +1541,10 @@ void sonoa_4way_hash( void *state, const void *input )
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
// 6
@@ -1935,18 +1607,10 @@ void sonoa_4way_hash( void *state, const void *input )
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1956,18 +1620,10 @@ void sonoa_4way_hash( void *state, const void *input )
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1977,18 +1633,10 @@ void sonoa_4way_hash( void *state, const void *input )
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
// 7
@@ -2051,18 +1699,10 @@ void sonoa_4way_hash( void *state, const void *input )
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
@@ -2072,18 +1712,10 @@ void sonoa_4way_hash( void *state, const void *input )
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );

View File

@@ -132,30 +132,14 @@ void x17_8way_hash( void *state, const void *input )
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash0, 64 );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash1, 64 );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash2, 64 );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash4, 64 );
sph_shavite512_close( &ctx.shavite, hash4 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash5, 64 );
sph_shavite512_close( &ctx.shavite, hash5 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash6, 64 );
sph_shavite512_close( &ctx.shavite, hash6 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash7, 64 );
sph_shavite512_close( &ctx.shavite, hash7 );
shavite512_full( &ctx.shavite, hash0, hash0, 64 );
shavite512_full( &ctx.shavite, hash1, hash1, 64 );
shavite512_full( &ctx.shavite, hash2, hash2, 64 );
shavite512_full( &ctx.shavite, hash3, hash3, 64 );
shavite512_full( &ctx.shavite, hash4, hash4, 64 );
shavite512_full( &ctx.shavite, hash5, hash5, 64 );
shavite512_full( &ctx.shavite, hash6, hash6, 64 );
shavite512_full( &ctx.shavite, hash7, hash7, 64 );
intrlv_4x128_512( vhashA, hash0, hash1, hash2, hash3 );
intrlv_4x128_512( vhashB, hash4, hash5, hash6, hash7 );
@@ -206,30 +190,14 @@ void x17_8way_hash( void *state, const void *input )
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash4, 64 );
sph_fugue512_close( &ctx.fugue, hash4 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash5, 64 );
sph_fugue512_close( &ctx.fugue, hash5 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash6, 64 );
sph_fugue512_close( &ctx.fugue, hash6 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash7, 64 );
sph_fugue512_close( &ctx.fugue, hash7 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 );
sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 );
sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
@@ -241,30 +209,14 @@ void x17_8way_hash( void *state, const void *input )
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash4, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash4 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash5, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash5 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash6, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash6 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash7, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash7 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash4, hash4, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash5, hash5, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
@@ -283,10 +235,10 @@ void x17_8way_hash( void *state, const void *input )
int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t hash32[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[7*8]);
uint32_t *hash32_d7 = &(hash32[7*8]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -294,7 +246,7 @@ int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
__m512i *noncev = (__m512i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32 = ptarget[7];
const uint32_t targ32_d7 = ptarget[7];
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
@@ -303,12 +255,12 @@ int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
x17_8way_hash( hash, vdata );
x17_8way_hash( hash32, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
if ( unlikely( ( hash32_d7[ lane ] <= targ32_d7 ) && !bench ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
extr_lane_8x32( lane_hash, hash32, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) ) )
{
pdata[19] = bswap_32( n + lane );
@@ -418,18 +370,10 @@ void x17_4way_hash( void *state, const void *input )
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
@@ -439,18 +383,10 @@ void x17_4way_hash( void *state, const void *input )
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -468,10 +404,10 @@ void x17_4way_hash( void *state, const void *input )
int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t hash32[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[ 7*4 ]);
uint32_t *hash32_d7 = &(hash32[ 7*4 ]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -479,7 +415,7 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
__m256i *noncev = (__m256i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32 = ptarget[7];
const uint32_t targ32_d7 = ptarget[7];
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
@@ -487,12 +423,12 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
x17_4way_hash( hash, vdata );
x17_4way_hash( hash32, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 && !bench ) )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
extr_lane_4x32( lane_hash, hash32, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = bswap_32( n + lane );

View File

@@ -134,30 +134,14 @@ void xevan_8way_hash( void *output, const void *input )
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, dataLen<<3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash0, dataLen );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash1, dataLen );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash2, dataLen );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash3, dataLen );
sph_shavite512_close( &ctx.shavite, hash3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash4, dataLen );
sph_shavite512_close( &ctx.shavite, hash4 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash5, dataLen );
sph_shavite512_close( &ctx.shavite, hash5 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash6, dataLen );
sph_shavite512_close( &ctx.shavite, hash6 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash7, dataLen );
sph_shavite512_close( &ctx.shavite, hash7 );
shavite512_full( &ctx.shavite, hash0, hash0, dataLen );
shavite512_full( &ctx.shavite, hash1, hash1, dataLen );
shavite512_full( &ctx.shavite, hash2, hash2, dataLen );
shavite512_full( &ctx.shavite, hash3, hash3, dataLen );
shavite512_full( &ctx.shavite, hash4, hash4, dataLen );
shavite512_full( &ctx.shavite, hash5, hash5, dataLen );
shavite512_full( &ctx.shavite, hash6, hash6, dataLen );
shavite512_full( &ctx.shavite, hash7, hash7, dataLen );
intrlv_4x128( vhashA, hash0, hash1, hash2, hash3, dataLen<<3 );
intrlv_4x128( vhashB, hash4, hash5, hash6, hash7, dataLen<<3 );
@@ -208,30 +192,14 @@ void xevan_8way_hash( void *output, const void *input )
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash, dataLen<<3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, dataLen );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, dataLen );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, dataLen );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, dataLen );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash4, dataLen );
sph_fugue512_close( &ctx.fugue, hash4 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash5, dataLen );
sph_fugue512_close( &ctx.fugue, hash5 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash6, dataLen );
sph_fugue512_close( &ctx.fugue, hash6 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash7, dataLen );
sph_fugue512_close( &ctx.fugue, hash7 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, dataLen );
sph_fugue512_full( &ctx.fugue, hash1, hash1, dataLen );
sph_fugue512_full( &ctx.fugue, hash2, hash2, dataLen );
sph_fugue512_full( &ctx.fugue, hash3, hash3, dataLen );
sph_fugue512_full( &ctx.fugue, hash4, hash4, dataLen );
sph_fugue512_full( &ctx.fugue, hash5, hash5, dataLen );
sph_fugue512_full( &ctx.fugue, hash6, hash6, dataLen );
sph_fugue512_full( &ctx.fugue, hash7, hash7, dataLen );
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, dataLen<<3 );
@@ -243,30 +211,14 @@ void xevan_8way_hash( void *output, const void *input )
dintrlv_8x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash, dataLen<<3 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash1, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash2, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash3, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash4, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash4 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash5, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash5 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash6, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash6 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash7, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash7 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash4, hash4, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash5, hash5, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, dataLen );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, dataLen<<3 );
@@ -345,30 +297,14 @@ void xevan_8way_hash( void *output, const void *input )
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, dataLen<<3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash0, dataLen );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash1, dataLen );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash2, dataLen );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash3, dataLen );
sph_shavite512_close( &ctx.shavite, hash3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash4, dataLen );
sph_shavite512_close( &ctx.shavite, hash4 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash5, dataLen );
sph_shavite512_close( &ctx.shavite, hash5 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash6, dataLen );
sph_shavite512_close( &ctx.shavite, hash6 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash7, dataLen );
sph_shavite512_close( &ctx.shavite, hash7 );
shavite512_full( &ctx.shavite, hash0, hash0, dataLen );
shavite512_full( &ctx.shavite, hash1, hash1, dataLen );
shavite512_full( &ctx.shavite, hash2, hash2, dataLen );
shavite512_full( &ctx.shavite, hash3, hash3, dataLen );
shavite512_full( &ctx.shavite, hash4, hash4, dataLen );
shavite512_full( &ctx.shavite, hash5, hash5, dataLen );
shavite512_full( &ctx.shavite, hash6, hash6, dataLen );
shavite512_full( &ctx.shavite, hash7, hash7, dataLen );
intrlv_4x128( vhashA, hash0, hash1, hash2, hash3, dataLen<<3 );
intrlv_4x128( vhashB, hash4, hash5, hash6, hash7, dataLen<<3 );
@@ -419,30 +355,14 @@ void xevan_8way_hash( void *output, const void *input )
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash, dataLen<<3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, dataLen );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, dataLen );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, dataLen );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, dataLen );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash4, dataLen );
sph_fugue512_close( &ctx.fugue, hash4 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash5, dataLen );
sph_fugue512_close( &ctx.fugue, hash5 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash6, dataLen );
sph_fugue512_close( &ctx.fugue, hash6 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash7, dataLen );
sph_fugue512_close( &ctx.fugue, hash7 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, dataLen );
sph_fugue512_full( &ctx.fugue, hash1, hash1, dataLen );
sph_fugue512_full( &ctx.fugue, hash2, hash2, dataLen );
sph_fugue512_full( &ctx.fugue, hash3, hash3, dataLen );
sph_fugue512_full( &ctx.fugue, hash4, hash4, dataLen );
sph_fugue512_full( &ctx.fugue, hash5, hash5, dataLen );
sph_fugue512_full( &ctx.fugue, hash6, hash6, dataLen );
sph_fugue512_full( &ctx.fugue, hash7, hash7, dataLen );
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, dataLen<<3 );
@@ -454,30 +374,14 @@ void xevan_8way_hash( void *output, const void *input )
dintrlv_8x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash, dataLen<<3 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash1, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash2, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash3, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash4, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash4 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash5, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash5 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash6, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash6 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash7, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash7 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash4, hash4, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash5, hash5, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, dataLen );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, dataLen<<3 );
@@ -636,18 +540,10 @@ void xevan_4way_hash( void *output, const void *input )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, dataLen );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, dataLen );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, dataLen );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, dataLen );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
// Parallel 4way 32 bit
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
@@ -659,18 +555,10 @@ void xevan_4way_hash( void *output, const void *input )
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
// Serial
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash1, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash2, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash3, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, dataLen );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
@@ -749,18 +637,10 @@ void xevan_4way_hash( void *output, const void *input )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, dataLen );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, dataLen );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, dataLen );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, dataLen );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
@@ -770,18 +650,10 @@ void xevan_4way_hash( void *output, const void *input )
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash1, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash2, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash3, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, dataLen );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, dataLen );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );

View File

@@ -73,6 +73,7 @@ bool register_yescryptr8g_algo( algo_gate_t* gate )
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_yespower_r8g;
gate->hash = (void*)&yespower_tls;
pk_buffer_size = 26;
opt_sapling = true;
opt_target_factor = 65536.0;
return true;

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.2.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.4.2.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.12.2'
PACKAGE_STRING='cpuminer-opt 3.12.2'
PACKAGE_VERSION='3.12.4.2'
PACKAGE_STRING='cpuminer-opt 3.12.4.2'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.12.2 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.12.4.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.12.2:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.12.4.2:";;
esac
cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.12.2
cpuminer-opt configure 3.12.4.2
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.12.2, which was
It was created by cpuminer-opt $as_me 3.12.4.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.12.2'
VERSION='3.12.4.2'
cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.12.2, which was
This file was extended by cpuminer-opt $as_me 3.12.4.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.12.2
cpuminer-opt config.status 3.12.4.2
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.12.2])
AC_INIT([cpuminer-opt], [3.12.4.2])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -129,7 +129,13 @@ char *rpc_url = NULL;;
char *rpc_userpass = NULL;
char *rpc_user, *rpc_pass;
char *short_url = NULL;
static unsigned char pk_script[25] = { 0 };
char *coinbase_address;
// pk_buffer_size is used as a version selector by b58 code, therefore
// it must be set correctly to work.
const int pk_buffer_size_max = 26;
int pk_buffer_size = 25;
static unsigned char pk_script[ 26 ] = { 0 };
static size_t pk_script_size = 0;
static char coinbase_sig[101] = { 0 };
char *opt_cert;
@@ -425,68 +431,71 @@ static bool work_decode( const json_t *val, struct work *work )
static const char *info_req =
"{\"method\": \"getmininginfo\", \"params\": [], \"id\":8}\r\n";
static bool get_mininginfo(CURL *curl, struct work *work)
static bool get_mininginfo( CURL *curl, struct work *work )
{
if (have_stratum || !allow_mininginfo)
if ( have_stratum || !allow_mininginfo )
return false;
int curl_err = 0;
json_t *val = json_rpc_call(curl, rpc_url, rpc_userpass, info_req, &curl_err, 0);
json_t *val = json_rpc_call( curl, rpc_url, rpc_userpass, info_req,
&curl_err, 0 );
if (!val && curl_err == -1) {
if ( !val && curl_err == -1 )
{
allow_mininginfo = false;
if (opt_debug) {
applog(LOG_DEBUG, "getmininginfo not supported");
}
if ( opt_debug )
applog( LOG_DEBUG, "getmininginfo not supported" );
return false;
}
else
{
json_t *res = json_object_get(val, "result");
// "blocks": 491493 (= current work height - 1)
// "difficulty": 0.99607860999999998
// "networkhashps": 56475980
if (res)
{
json_t *key = json_object_get(res, "difficulty");
if (key) {
if (json_is_object(key))
key = json_object_get(key, "proof-of-work");
if (json_is_real(key))
net_diff = json_real_value(key);
}
key = json_object_get(res, "networkhashps");
if (key && json_is_integer(key)) {
net_hashrate = (double) json_integer_value(key);
}
key = json_object_get(res, "blocks");
if (key && json_is_integer(key)) {
net_blocks = json_integer_value(key);
}
if (!work->height)
{
// complete missing data from getwork
work->height = (uint32_t) net_blocks + 1;
if (work->height > g_work.height)
{
restart_threads();
if (!opt_quiet) {
char netinfo[64] = { 0 };
char srate[32] = { 0 };
sprintf(netinfo, "diff %.2f", net_diff);
if (net_hashrate) {
format_hashrate(net_hashrate, srate);
strcat(netinfo, ", net ");
strcat(netinfo, srate);
}
applog(LOG_BLUE, "%s block %d, %s",
algo_names[opt_algo], work->height, netinfo);
}
}
}
json_t *res = json_object_get( val, "result" );
// "blocks": 491493 (= current work height - 1)
// "difficulty": 0.99607860999999998
// "networkhashps": 56475980
if ( res )
{
json_t *key = json_object_get( res, "difficulty" );
if ( key )
{
if ( json_is_object( key ) )
key = json_object_get( key, "proof-of-work" );
if ( json_is_real( key ) )
net_diff = json_real_value( key );
}
key = json_object_get( res, "networkhashps" );
if ( key && json_is_integer( key ) )
net_hashrate = (double) json_integer_value( key );
key = json_object_get( res, "blocks" );
if ( key && json_is_integer( key ) )
net_blocks = json_integer_value( key );
if ( !work->height )
{
// complete missing data from getwork
work->height = (uint32_t) net_blocks + 1;
if ( work->height > g_work.height )
{
restart_threads();
if ( !opt_quiet )
{
char netinfo[64] = { 0 };
char srate[32] = { 0 };
sprintf( netinfo, "diff %.2f", net_diff );
if ( net_hashrate )
{
format_hashrate( net_hashrate, srate );
strcat( netinfo, ", net " );
strcat( netinfo, srate );
}
applog( LOG_BLUE, "%s block %d, %s",
algo_names[opt_algo], work->height, netinfo );
}
}
} // res
}
json_decref(val);
json_decref( val );
return true;
}
@@ -1118,7 +1127,7 @@ static int share_result( int result, struct work *null_work,
sprintf( bres, "B%d", solved_block_count );
if ( stale )
{
sprintf( sres, "Stale job %d", stale_share_count );
sprintf( sres, "Stale %d", stale_share_count );
sprintf( rres, "R%d", rejected_share_count );
}
else
@@ -1146,10 +1155,17 @@ static int share_result( int result, struct work *null_work,
my_stats.share_count, acol, ares, scol, sres, rcol, rres, bcol,
bres, share_time, latency );
if ( have_stratum && !opt_quiet )
applog2( LOG_NOTICE, "Diff %.5g (%.3g%), %sBlock %d, %sJob %s" CL_WHT,
if ( !opt_quiet )
{
if ( have_stratum )
applog2( LOG_NOTICE, "Diff %.5g (%.3g%), %sBlock %d, %sJob %s" CL_WHT,
my_stats.share_diff, share_ratio, bcol, stratum.block_height,
scol, my_stats.job_id );
else
applog2( LOG_NOTICE, "Diff %.5g (%.3g%), %sBlock %d" CL_WHT,
my_stats.share_diff, share_ratio, bcol, stratum.block_height,
scol );
}
if ( unlikely( reason && !result ) )
{
@@ -1471,6 +1487,36 @@ start:
json_decref( val );
// store work height in solo
get_mininginfo(curl, work);
applog( LOG_BLUE, "%s %s block %d, diff %.5g", work->height, net_diff );
if ( !opt_quiet && net_diff && net_hashrate )
{
double miner_hr = 0.;
pthread_mutex_lock( &stats_lock );
for ( int i = 0; i < opt_n_threads; i++ )
miner_hr += thr_hashrates[i];
global_hashrate = miner_hr;
pthread_mutex_unlock( &stats_lock );
if ( miner_hr )
{
char net_hr_units[4] = {0};
char miner_hr_units[4] = {0};
char net_ttf[32];
char miner_ttf[32];
sprintf_et( net_ttf, net_diff * diff_to_hash / net_hashrate );
sprintf_et( miner_ttf, net_diff * diff_to_hash / miner_hr );
scale_hash_for_display ( &miner_hr, miner_hr_units );
scale_hash_for_display ( &net_hashrate, net_hr_units );
applog2(LOG_INFO, "Miner TTF @ %.2f %sh/s %s, net TTF @ %.2f %sh/s %s",
miner_hr, miner_hr_units, miner_ttf,
net_hashrate, net_hr_units, net_ttf );
}
}
return rc;
}
@@ -1517,6 +1563,8 @@ static bool workio_get_work(struct workio_cmd *wc, CURL *curl)
sleep(opt_fail_pause);
}
report_summary_log( false );
/* send work to requesting thread */
if (!tq_push(wc->thr->q, ret_work))
free(ret_work);
@@ -1692,8 +1740,8 @@ void work_set_target_ratio( struct work* work, const void *hash )
share_stats[ s_put_ptr ].net_diff = net_diff;
share_stats[ s_put_ptr ].stratum_diff = stratum_diff;
share_stats[ s_put_ptr ].target_diff = work->targetdiff;
( (uint64_t*)share_stats[ s_put_ptr ].job_id )[3] = 0;
strncpy( share_stats[ s_put_ptr ].job_id, work->job_id, 30 );
if ( have_stratum )
strncpy( share_stats[ s_put_ptr ].job_id, work->job_id, 30 );
s_put_ptr = stats_ptr_incr( s_put_ptr );
pthread_mutex_unlock( &stats_lock );
@@ -1706,24 +1754,31 @@ bool submit_solution( struct work *work, const void *hash,
{
submitted_share_count++;
work_set_target_ratio( work, hash );
if ( !opt_quiet )
applog( LOG_NOTICE, "%d submitted by thread %d, job %s",
submitted_share_count, thr->id, work->job_id );
if ( lowdiff_debug )
{
uint32_t* h = (uint32_t*)hash;
uint32_t* t = (uint32_t*)work->target;
applog(LOG_INFO,"Hash[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0]);
applog(LOG_INFO,"Targ[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0]);
}
return true;
if ( !opt_quiet )
{
if ( have_stratum )
applog( LOG_NOTICE, "%d submitted by thread %d, job %s",
submitted_share_count, thr->id, work->job_id );
else
applog( LOG_NOTICE, "%d submitted by thread %d",
submitted_share_count, thr->id );
}
if ( lowdiff_debug )
{
uint32_t* h = (uint32_t*)hash;
uint32_t* t = (uint32_t*)work->target;
applog(LOG_INFO,"Hash[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0]);
applog(LOG_INFO,"Targ[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0]);
}
return true;
}
else
applog( LOG_WARNING, "%d failed to submit share.",
submitted_share_count );
applog( LOG_WARNING, "%d failed to submit share thread %d.",
submitted_share_count, thr->id );
return false;
}
@@ -1734,26 +1789,31 @@ bool submit_lane_solution( struct work *work, const void *hash,
{
submitted_share_count++;
work_set_target_ratio( work, hash );
if ( !opt_quiet )
applog( LOG_NOTICE, "%d submitted by thread %d, lane %d, job %s",
submitted_share_count, thr->id, lane, work->job_id );
{
if ( have_stratum )
applog( LOG_NOTICE, "%d submitted by thread %d, lane %d, job %s",
submitted_share_count, thr->id, lane, work->job_id );
else
applog( LOG_NOTICE, "%d submitted by thread %d, lane %d",
submitted_share_count, thr->id, lane );
}
if ( lowdiff_debug )
{
uint32_t* h = (uint32_t*)hash;
uint32_t* t = (uint32_t*)work->target;
applog(LOG_INFO,"Hash[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0]);
applog(LOG_INFO,"Targ[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0]);
}
return true;
if ( lowdiff_debug )
{
uint32_t* h = (uint32_t*)hash;
uint32_t* t = (uint32_t*)work->target;
applog(LOG_INFO,"Hash[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0]);
applog(LOG_INFO,"Targ[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0]);
}
return true;
}
else
applog( LOG_WARNING, "%d failed to submit share.",
submitted_share_count );
applog( LOG_WARNING, "%d failed to submit share, thread %d, lane %d.",
submitted_share_count, thr->id, lane );
return false;
}
@@ -1847,10 +1907,15 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t *end_nonce_ptr )
{
uint32_t *nonceptr = work->data + algo_gate.nonce_index;
bool force_new_work;
bool force_new_work = work->job_id ? strtoul( work->job_id, NULL, 16 ) !=
strtoul( g_work->job_id, NULL, 16 )
: true;
if ( have_stratum )
force_new_work = work->job_id ? strtoul( work->job_id, NULL, 16 )
!= strtoul( g_work->job_id, NULL, 16 )
: true;
else
force_new_work = memcmp( work->data, g_work->data,
algo_gate.work_cmp_size );
if ( force_new_work || *nonceptr >= *end_nonce_ptr )
{
@@ -2030,12 +2095,28 @@ static void *miner_thread( void *userdata )
sleep(5);
continue;
}
// adjust max_nonce to meet target scan time
// LP_SCANTIME overrides opt_scantime option, is this right?
// adjust max_nonce to meet target scan time. Startum and longpoll
// can go longer because they can rely on restart_threads to signal
// an early abort. get_work on the other hand can't rely on
// restart_threads so need a much shorter scantime
if ( have_stratum )
max64 = 60 * thr_hashrates[thr_id];
else if ( have_longpoll )
max64 = LP_SCANTIME * thr_hashrates[thr_id];
else // getwork inline
max64 = opt_scantime * thr_hashrates[thr_id];
/*
if ( have_stratum )
max64 = LP_SCANTIME;
else
max64 = g_work_time + ( have_longpoll ? LP_SCANTIME : opt_scantime )
- time(NULL);
*/
// time limit
if ( unlikely( opt_time_limit && firstwork_time ) )
{
@@ -2062,17 +2143,20 @@ static void *miner_thread( void *userdata )
}
if ( remain < max64 ) max64 = remain;
}
// Select nonce range for approx 1 min duration based
// on hashrate, initial value arbitrarilly set to 1000 just to get
// Select nonce range based on max64, the estimated number of hashes
// to meet the desired scan time.
// Initial value arbitrarilly set to 1000 just to get
// a sample hashrate for the next time.
uint32_t work_nonce = *nonceptr;
max64 = 60 * thr_hashrates[thr_id];
// max64 = 60 * thr_hashrates[thr_id];
if ( max64 <= 0)
max64 = 1000;
if ( work_nonce + max64 > end_nonce )
max_nonce = end_nonce;
else
max_nonce = work_nonce + (uint32_t)max64;
// init time
if ( firstwork_time == 0 )
firstwork_time = time(NULL);
@@ -2286,13 +2370,18 @@ start:
soval = json_object_get(res, "submitold");
submit_old = soval ? json_is_true(soval) : false;
pthread_mutex_lock(&g_work_lock);
start_job_id = g_work.job_id ? strdup(g_work.job_id) : NULL;
// This code has been here for a long time even though job_id isn't used.
// This needs to be changed eventually to test the block height properly
// using g_work.block_height .
start_job_id = g_work.job_id ? strdup(g_work.job_id) : NULL;
if (have_gbt)
rc = gbt_work_decode(res, &g_work);
else
rc = work_decode(res, &g_work);
if (rc)
{
// purge job id from solo mining
bool newblock = g_work.job_id && strcmp(start_job_id, g_work.job_id);
newblock |= (start_diff != net_diff); // the best is the height but... longpoll...
if (newblock)
@@ -2458,6 +2547,8 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
pthread_mutex_unlock( &sctx->work_lock );
restart_threads();
if ( opt_debug )
{
unsigned char *xnonce2str = abin2hex( g_work->xnonce2,
@@ -2982,11 +3073,7 @@ void parse_arg(int key, char *arg )
opt_hash_meter = true;
break;
case 1016: /* --coinbase-addr */
pk_script_size = address_to_script(pk_script, sizeof(pk_script), arg);
if (!pk_script_size) {
fprintf(stderr, "invalid address -- '%s'\n", arg);
show_usage_and_exit(1);
}
if ( arg ) coinbase_address = strdup( arg );
break;
case 1015: /* --coinbase-sig */
if (strlen(arg) + 1 > sizeof(coinbase_sig)) {
@@ -3469,6 +3556,17 @@ int main(int argc, char *argv[])
// All options must be set before starting the gate
if ( !register_algo_gate( opt_algo, &algo_gate ) ) exit(1);
if ( coinbase_address )
{
pk_script_size = address_to_script( pk_script, pk_buffer_size,
coinbase_address );
if ( !pk_script_size )
{
applog(LOG_ERR,"Invalid coinbase address: '%s'", coinbase_address );
exit(0);
}
}
// Initialize stats times and counters
memset( share_stats, 0, 2 * sizeof (struct share_stats_t) );
gettimeofday( &last_submit_time, NULL );
@@ -3619,7 +3717,10 @@ int main(int argc, char *argv[])
/* ESET-NOD32 Detects these 2 thread_create... */
if (want_longpoll && !have_stratum)
{
/* init longpoll thread info */
if ( opt_debug )
applog(LOG_INFO,"Creating long poll thread");
/* init longpoll thread info */
longpoll_thr_id = opt_n_threads + 1;
thr = &thr_info[longpoll_thr_id];
thr->id = longpoll_thr_id;
@@ -3635,7 +3736,10 @@ int main(int argc, char *argv[])
}
if (want_stratum)
{
/* init stratum thread info */
if ( opt_debug )
applog(LOG_INFO,"Creating stratum thread");
/* init stratum thread info */
stratum_thr_id = opt_n_threads + 2;
thr = &thr_info[stratum_thr_id];
thr->id = stratum_thr_id;
@@ -3655,7 +3759,10 @@ int main(int argc, char *argv[])
if ( opt_api_enabled )
{
/* api thread */
if ( opt_debug )
applog(LOG_INFO,"Creating API thread");
/* api thread */
api_thr_id = opt_n_threads + 3;
thr = &thr_info[api_thr_id];
thr->id = api_thr_id;
@@ -3665,7 +3772,7 @@ int main(int argc, char *argv[])
err = thread_create( thr, api_thread );
if ( err )
{
applog( LOG_ERR, "api thread create failed" );
applog( LOG_ERR, "API thread create failed" );
return 1;
}
if ( !opt_quiet )

View File

@@ -754,6 +754,8 @@ extern uint32_t solved_block_count;
extern pthread_mutex_t applog_lock;
extern pthread_mutex_t stats_lock;
extern bool opt_sapling;
extern const int pk_buffer_size_max;
extern int pk_buffer_size;
static char const usage[] = "\
Usage: " PACKAGE_NAME " [OPTIONS]\n\

View File

@@ -120,11 +120,26 @@ do { \
} while(0)
// Horizontal vector testing
#define mm256_allbits0( a ) _mm256_testc_si256( a, m256_neg1 )
#define mm256_allbits1( a ) _mm256_testz_si256( a, a )
#define mm256_anybits0( a ) !mm256_allbits1( a )
#define mm256_anybits1( a ) !mm256_allbits0( a )
// Bytewise test of all 256 bits
#define mm256_all0_8( a ) \
( _mm256_movemask_epi8( a ) == 0 )
#define mm256_all1_8( a ) \
( _mm256_movemask_epi8( a ) == -1 )
#define mm256_anybits0( a ) \
( _mm256_movemask_epi8( a ) & 0xffffffff )
#define mm256_anybits1( a ) \
( ( _mm256_movemask_epi8( a ) & 0xffffffff ) != 0xffffffff )
// Bitwise test of all 256 bits
#define mm256_allbits0( a ) _mm256_testc_si256( a, m256_neg1 )
#define mm256_allbits1( a ) _mm256_testc_si256( m256_zero, a )
//#define mm256_anybits0( a ) !mm256_allbits1( a )
//#define mm256_anybits1( a ) !mm256_allbits0( a )
// Parallel AES, for when x is expected to be in a 256 bit register.

23
util.c
View File

@@ -159,8 +159,6 @@ void applog2( int prio, const char *fmt, ... )
}
void applog(int prio, const char *fmt, ...)
{
va_list ap;
@@ -921,25 +919,28 @@ bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t buflen)
return true;
}
size_t address_to_script(unsigned char *out, size_t outsz, const char *addr)
size_t address_to_script( unsigned char *out, size_t outsz, const char *addr )
{
unsigned char addrbin[26];
unsigned char addrbin[ pk_buffer_size_max ];
int addrver;
size_t rv;
if (!b58dec(addrbin, sizeof(addrbin), addr))
if ( !b58dec( addrbin, outsz, addr ) )
return 0;
addrver = b58check(addrbin, sizeof(addrbin), addr);
if (addrver < 0)
addrver = b58check( addrbin, outsz, addr );
if ( addrver < 0 )
return 0;
switch (addrver) {
switch ( addrver )
{
case 5: /* Bitcoin script hash */
case 196: /* Testnet script hash */
if (outsz < (rv = 23))
if ( outsz < ( rv = 23 ) )
return rv;
out[ 0] = 0xa9; /* OP_HASH160 */
out[ 1] = 0x14; /* push 20 bytes */
memcpy(&out[2], &addrbin[1], 20);
memcpy( &out[2], &addrbin[1], 20 );
out[22] = 0x87; /* OP_EQUAL */
return rv;
default:
@@ -948,7 +949,7 @@ size_t address_to_script(unsigned char *out, size_t outsz, const char *addr)
out[ 0] = 0x76; /* OP_DUP */
out[ 1] = 0xa9; /* OP_HASH160 */
out[ 2] = 0x14; /* push 20 bytes */
memcpy(&out[3], &addrbin[1], 20);
memcpy( &out[3], &addrbin[1], 20 );
out[23] = 0x88; /* OP_EQUALVERIFY */
out[24] = 0xac; /* OP_CHECKSIG */
return rv;