mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
12 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
3da149418a | ||
![]() |
720610cce5 | ||
![]() |
cedcf4d070 | ||
![]() |
81b50c3c71 | ||
![]() |
0e1e88f53e | ||
![]() |
45c77a5c81 | ||
![]() |
dbce7e0721 | ||
![]() |
6d66051de6 | ||
![]() |
b93be8816a | ||
![]() |
19b0ac6d5c | ||
![]() |
3da2b958cf | ||
![]() |
dc2f8d81d3 |
35
README.md
35
README.md
@@ -12,10 +12,24 @@ a false positive, they are flagged simply because they are cryptocurrency
|
||||
miners. The source code is open for anyone to inspect. If you don't trust
|
||||
the software, don't use it.
|
||||
|
||||
|
||||
New thread:
|
||||
|
||||
https://bitcointalk.org/index.php?topic=5226770.msg53865575#msg53865575
|
||||
|
||||
Old thread:
|
||||
|
||||
https://bitcointalk.org/index.php?topic=1326803.0
|
||||
|
||||
mailto://jayddee246@gmail.com
|
||||
|
||||
This note is to confirm that bitcointalk users JayDDee and joblo are the
|
||||
same person.
|
||||
|
||||
I created a new BCT user JayDDee to match my github user id.
|
||||
The old thread has been locked but still contains useful information for
|
||||
reading.
|
||||
|
||||
See file RELEASE_NOTES for change log and INSTALL_LINUX or INSTALL_WINDOWS
|
||||
for compile instructions.
|
||||
|
||||
@@ -138,6 +152,27 @@ Supported Algorithms
|
||||
yespower-b2b generic yespower + blake2b
|
||||
zr5 Ziftr
|
||||
|
||||
Many variations of scrypt based algos can be mine by specifying their
|
||||
parameters:
|
||||
|
||||
scryptn2: --algo scrypt --param-n 1048576
|
||||
|
||||
cpupower: --algo yespower --param-key "CPUpower: The number of CPU working or available for proof-of-work mining"
|
||||
|
||||
power2b: --algo yespower-b2b --param-n 2048 --param-r 32 --param-key "Now I am become Death, the destroyer of worlds"
|
||||
|
||||
sugarchain: --algo yespower --param-n 2048 -param-r 32 --param-key "Satoshi Nakamoto 31/Oct/2008 Proof-of-work is essentially one-CPU-one-vote"
|
||||
|
||||
yespoweriots: --a yespower --param-n 2048 --param-key "Iots is committed to the development of IOT"
|
||||
|
||||
yespowerlitb: --algo yespower --param-n 2048 --param-r 32 --param-key "LITBpower: The number of LITB working or available for proof-of-work mini"
|
||||
|
||||
yespoweric: --algo yespower --param-n 2048 --param-r 32 --param-key "IsotopeC"
|
||||
|
||||
yespowerurx: --algo yespower --param-n 2048 --param-r 32 --param-key "UraniumX"
|
||||
|
||||
yespowerltncg: --algo yespower --param-n 2048 --param-r 32 --param-key "LTNCGYES"
|
||||
|
||||
Errata
|
||||
------
|
||||
|
||||
|
@@ -65,6 +65,105 @@ If not what makes it happen or not happen?
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.12.5
|
||||
|
||||
Issues #246 & #251: fixed incorrect share diff for stratum and getwork,
|
||||
fixed incorrect target diff for getwork. Stats should now be correct for
|
||||
getwork as well as stratum.
|
||||
|
||||
Getwork: reduce stale blocks, faster response to new work.
|
||||
|
||||
Added ntime to new job/work logs.
|
||||
|
||||
README.md now lists the parameters for yespower variations that don't have
|
||||
a specific algo name.
|
||||
|
||||
v3.12.4.6
|
||||
|
||||
Issue #246: fixed getwork repeated new block logs with same height. New work
|
||||
for the same block is now reported as "New work" instead of New block".
|
||||
Also added a check that work is new before generating "New work" log.
|
||||
|
||||
Added target diff to getwork new block log.
|
||||
|
||||
Changed share ratio in share result log to simple fraction, no longer %.
|
||||
|
||||
Added debug log to display mininginfo, use -D.
|
||||
|
||||
v3.12.4.5
|
||||
|
||||
Issue #246: better stale share detection for getwork, and enhanced logging
|
||||
of stale shares for stratum & getwork.
|
||||
|
||||
Issue #251: fixed incorrect share difficulty and share ratio in share
|
||||
result log.
|
||||
|
||||
Changed submit log to include share diff and block height.
|
||||
|
||||
Small cosmetic changes to logs.
|
||||
|
||||
v3.12.4.4
|
||||
|
||||
Issue #246: Fixed net hashrate in getwork block log,
|
||||
removed duplicate getwork block log,
|
||||
other small tweaks to stats logs for getwork.
|
||||
|
||||
Issue #248: Fixed chronic stale shares with scrypt:1048576 (scryptn2).
|
||||
|
||||
v3.12.4.3
|
||||
|
||||
Fixed segfault in new block log for getwork.
|
||||
|
||||
Disabled silent discarding of stale work after the submit is logged.
|
||||
|
||||
v3.12.4.2
|
||||
|
||||
Issue #245: fixed getwork stale shares, solo mining with getwork now works.
|
||||
|
||||
Issue #246: implemented block and summary logs for getwork.
|
||||
|
||||
v3.12.4.1
|
||||
|
||||
Issue #245: fix scantime when mining solo with getwork.
|
||||
|
||||
Added debug logs for creation of stratum and longpoll threads, use -D to
|
||||
enable.
|
||||
|
||||
v3.12.4
|
||||
|
||||
Issue #244: Change longpoll to ignore job id.
|
||||
|
||||
Lyra2rev2 AVX2 +3%, AVX512 +6%.
|
||||
|
||||
v3.12.3.1
|
||||
|
||||
Issue #241: Fixed regression that broke coinbase address in v3.11.7.
|
||||
|
||||
v3.12.3
|
||||
|
||||
Issue #238: Fixed skunk AVX2.
|
||||
|
||||
Issue #239: Faster AVX2 & AVX512 for skein +44%, skein2 +30%, plus marginal
|
||||
increases for skunk, x16r, x16rv2, x16rt, x16rt-veil, x16s, x21s.
|
||||
|
||||
Faster anime VAES +57%, AVX512 +21%, AVX2 +3%.
|
||||
|
||||
Redesigned code reponsible for #236.
|
||||
|
||||
v3.12.2
|
||||
|
||||
Fixed xevan, skein, skein2 AVX2, #238.
|
||||
|
||||
Reversed polarity of AVX2 vector bit test utilities, and all users, to be
|
||||
logically and semantically correct. Follow up to issue #236.
|
||||
|
||||
v3.12.1
|
||||
|
||||
Fixed anime AVX2 low difficulty shares, git issue #236.
|
||||
|
||||
Periodic summary now reports lost hash rate due to rejected and stale shares,
|
||||
displayed only when non-zero.
|
||||
|
||||
v3.12.0.1
|
||||
|
||||
Fixed hodl rejects, git issue #237.
|
||||
|
@@ -281,39 +281,37 @@ void exec_hash_function( int algo, void *output, const void *pdata )
|
||||
const char* const algo_alias_map[][2] =
|
||||
{
|
||||
// alias proper
|
||||
{ "argon2d-crds", "argon2d250" },
|
||||
{ "argon2d-dyn", "argon2d500" },
|
||||
{ "argon2d-uis", "argon2d4096" },
|
||||
{ "bcd", "x13bcd" },
|
||||
{ "bitcore", "timetravel10" },
|
||||
{ "bitzeny", "yescryptr8" },
|
||||
{ "blake256r8", "blakecoin" },
|
||||
{ "blake256r8vnl", "vanilla" },
|
||||
{ "blake256r14", "blake" },
|
||||
{ "blake256r14dcr", "decred" },
|
||||
{ "cryptonote", "cryptonight" },
|
||||
{ "cryptonight-light", "cryptolight" },
|
||||
{ "diamond", "dmd-gr" },
|
||||
{ "droplp", "drop" },
|
||||
{ "espers", "hmq1725" },
|
||||
{ "flax", "c11" },
|
||||
{ "hsr", "x13sm3" },
|
||||
{ "jackpot", "jha" },
|
||||
{ "jane", "scryptjane" },
|
||||
{ "lyra2", "lyra2re" },
|
||||
{ "lyra2v2", "lyra2rev2" },
|
||||
{ "lyra2v3", "lyra2rev3" },
|
||||
{ "myrgr", "myr-gr" },
|
||||
{ "myriad", "myr-gr" },
|
||||
{ "neo", "neoscrypt" },
|
||||
{ "phi", "phi1612" },
|
||||
{ "sib", "x11gost" },
|
||||
{ "timetravel8", "timetravel" },
|
||||
{ "veil", "x16rt-veil" },
|
||||
{ "x16r-hex", "hex" },
|
||||
{ "yenten", "yescryptr16" },
|
||||
{ "ziftr", "zr5" },
|
||||
{ NULL, NULL }
|
||||
{ "argon2d-crds", "argon2d250" },
|
||||
{ "argon2d-dyn", "argon2d500" },
|
||||
{ "argon2d-uis", "argon2d4096" },
|
||||
{ "bcd", "x13bcd" },
|
||||
{ "bitcore", "timetravel10" },
|
||||
{ "bitzeny", "yescryptr8" },
|
||||
{ "blake256r8", "blakecoin" },
|
||||
{ "blake256r8vnl", "vanilla" },
|
||||
{ "blake256r14", "blake" },
|
||||
{ "blake256r14dcr", "decred" },
|
||||
{ "diamond", "dmd-gr" },
|
||||
{ "espers", "hmq1725" },
|
||||
{ "flax", "c11" },
|
||||
{ "hsr", "x13sm3" },
|
||||
{ "jackpot", "jha" },
|
||||
{ "jane", "scryptjane" },
|
||||
{ "lyra2", "lyra2re" },
|
||||
{ "lyra2v2", "lyra2rev2" },
|
||||
{ "lyra2v3", "lyra2rev3" },
|
||||
{ "myrgr", "myr-gr" },
|
||||
{ "myriad", "myr-gr" },
|
||||
{ "neo", "neoscrypt" },
|
||||
{ "phi", "phi1612" },
|
||||
{ "scryptn2", "scrypt:1048576" },
|
||||
{ "sib", "x11gost" },
|
||||
{ "timetravel8", "timetravel" },
|
||||
{ "veil", "x16rt-veil" },
|
||||
{ "x16r-hex", "hex" },
|
||||
{ "yenten", "yescryptr16" },
|
||||
{ "ziftr", "zr5" },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
|
||||
// if arg is a valid alias for a known algo it is updated with the proper
|
||||
|
@@ -138,7 +138,7 @@ void bmw512_2way_close( bmw512_2way_context *ctx, void *dst );
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// BMW-512 4 way 64
|
||||
// BMW-512 64 bit 4 way
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[16];
|
||||
@@ -149,7 +149,6 @@ typedef struct {
|
||||
|
||||
typedef bmw_4way_big_context bmw512_4way_context;
|
||||
|
||||
|
||||
void bmw512_4way_init(void *cc);
|
||||
|
||||
void bmw512_4way_update(void *cc, const void *data, size_t len);
|
||||
@@ -164,6 +163,7 @@ void bmw512_4way_addbits_and_close(
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
// BMW-512 64 bit 8 way
|
||||
typedef struct {
|
||||
__m512i buf[16];
|
||||
__m512i H[16];
|
||||
@@ -171,6 +171,8 @@ typedef struct {
|
||||
uint64_t bit_count;
|
||||
} bmw512_8way_context __attribute__((aligned(128)));
|
||||
|
||||
void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data,
|
||||
size_t len );
|
||||
void bmw512_8way_init( bmw512_8way_context *ctx );
|
||||
void bmw512_8way_update( bmw512_8way_context *ctx, const void *data,
|
||||
size_t len );
|
||||
|
@@ -1507,6 +1507,93 @@ void bmw512_8way_close( bmw512_8way_context *ctx, void *dst )
|
||||
casti_m512i( dst, u ) = h1[ v ];
|
||||
}
|
||||
|
||||
void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
__m512i *buf = ctx->buf;
|
||||
__m512i htmp[16];
|
||||
__m512i *H = ctx->H;
|
||||
__m512i *h2 = htmp;
|
||||
uint64_t bit_count = len * 8;
|
||||
size_t ptr = 0;
|
||||
const int buf_size = 128; // bytes of one lane, compatible with len
|
||||
|
||||
// Init
|
||||
|
||||
H[ 0] = m512_const1_64( 0x8081828384858687 );
|
||||
H[ 1] = m512_const1_64( 0x88898A8B8C8D8E8F );
|
||||
H[ 2] = m512_const1_64( 0x9091929394959697 );
|
||||
H[ 3] = m512_const1_64( 0x98999A9B9C9D9E9F );
|
||||
H[ 4] = m512_const1_64( 0xA0A1A2A3A4A5A6A7 );
|
||||
H[ 5] = m512_const1_64( 0xA8A9AAABACADAEAF );
|
||||
H[ 6] = m512_const1_64( 0xB0B1B2B3B4B5B6B7 );
|
||||
H[ 7] = m512_const1_64( 0xB8B9BABBBCBDBEBF );
|
||||
H[ 8] = m512_const1_64( 0xC0C1C2C3C4C5C6C7 );
|
||||
H[ 9] = m512_const1_64( 0xC8C9CACBCCCDCECF );
|
||||
H[10] = m512_const1_64( 0xD0D1D2D3D4D5D6D7 );
|
||||
H[11] = m512_const1_64( 0xD8D9DADBDCDDDEDF );
|
||||
H[12] = m512_const1_64( 0xE0E1E2E3E4E5E6E7 );
|
||||
H[13] = m512_const1_64( 0xE8E9EAEBECEDEEEF );
|
||||
H[14] = m512_const1_64( 0xF0F1F2F3F4F5F6F7 );
|
||||
H[15] = m512_const1_64( 0xF8F9FAFBFCFDFEFF );
|
||||
|
||||
// Update
|
||||
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_512( buf + (ptr>>3), vdata, clen >> 3 );
|
||||
vdata = vdata + (clen>>3);
|
||||
len -= clen;
|
||||
ptr += clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
__m512i *ht;
|
||||
compress_big_8way( buf, H, h2 );
|
||||
ht = H;
|
||||
H = h2;
|
||||
h2 = ht;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
if ( H != ctx->H )
|
||||
memcpy_512( ctx->H, H, 16 );
|
||||
|
||||
// Close
|
||||
{
|
||||
__m512i h1[16], h2[16];
|
||||
size_t u, v;
|
||||
|
||||
buf[ ptr>>3 ] = m512_const1_64( 0x80 );
|
||||
ptr += 8;
|
||||
|
||||
if ( ptr > (buf_size - 8) )
|
||||
{
|
||||
memset_zero_512( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||
compress_big_8way( buf, H, h1 );
|
||||
ptr = 0;
|
||||
H = h1;
|
||||
}
|
||||
memset_zero_512( buf + (ptr>>3), (buf_size - 8 - ptr) >> 3 );
|
||||
buf[ (buf_size - 8) >> 3 ] = _mm512_set1_epi64( bit_count );
|
||||
compress_big_8way( buf, H, h2 );
|
||||
for ( u = 0; u < 16; u ++ )
|
||||
buf[ u ] = h2[ u ];
|
||||
compress_big_8way( buf, final_b8, h1 );
|
||||
for (u = 0, v = 8; u < 8; u ++, v ++)
|
||||
casti_m512i( out, u ) = h1[ v ];
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@@ -179,14 +179,6 @@ int cube_4way_full( cube_4way_context *sp, void *output, int hashbitlen,
|
||||
sp->rounds = 16;
|
||||
sp->pos = 0;
|
||||
|
||||
h[ 0] = m512_const1_128( iv[0] );
|
||||
h[ 1] = m512_const1_128( iv[1] );
|
||||
h[ 2] = m512_const1_128( iv[2] );
|
||||
h[ 3] = m512_const1_128( iv[3] );
|
||||
h[ 4] = m512_const1_128( iv[4] );
|
||||
h[ 5] = m512_const1_128( iv[5] );
|
||||
h[ 6] = m512_const1_128( iv[6] );
|
||||
h[ 7] = m512_const1_128( iv[7] );
|
||||
h[ 0] = m512_const1_128( iv[0] );
|
||||
h[ 1] = m512_const1_128( iv[1] );
|
||||
h[ 2] = m512_const1_128( iv[2] );
|
||||
@@ -447,14 +439,6 @@ int cube_2way_full( cube_2way_context *sp, void *output, int hashbitlen,
|
||||
sp->rounds = 16;
|
||||
sp->pos = 0;
|
||||
|
||||
h[ 0] = m256_const1_128( iv[0] );
|
||||
h[ 1] = m256_const1_128( iv[1] );
|
||||
h[ 2] = m256_const1_128( iv[2] );
|
||||
h[ 3] = m256_const1_128( iv[3] );
|
||||
h[ 4] = m256_const1_128( iv[4] );
|
||||
h[ 5] = m256_const1_128( iv[5] );
|
||||
h[ 6] = m256_const1_128( iv[6] );
|
||||
h[ 7] = m256_const1_128( iv[7] );
|
||||
h[ 0] = m256_const1_128( iv[0] );
|
||||
h[ 1] = m256_const1_128( iv[1] );
|
||||
h[ 2] = m256_const1_128( iv[2] );
|
||||
|
@@ -28,6 +28,27 @@ int cube_4way_update_close( cube_4way_context *sp, void *output,
|
||||
int cube_4way_full( cube_4way_context *sp, void *output, int hashbitlen,
|
||||
const void *data, size_t size );
|
||||
|
||||
int cube_4x256_full( cube_4way_context *sp, void *output, int hashbitlen,
|
||||
const void *data, size_t size );
|
||||
|
||||
#define cube512_4way_init( sp ) cube_4way_update( sp, 512 )
|
||||
#define cube512_4way_update cube_4way_update
|
||||
#define cube512_4way_update_close cube_4way_update
|
||||
#define cube512_4way_close cube_4way_update
|
||||
#define cube512_4way_full( sp, output, data, size ) \
|
||||
cube_4way_full( sp, output, 512, data, size )
|
||||
#define cube512_4x256_full( sp, output, data, size ) \
|
||||
cube_4x256_full( sp, output, 512, data, size )
|
||||
|
||||
#define cube256_4way_init( sp ) cube_4way_update( sp, 256 )
|
||||
#define cube256_4way_update cube_4way_update
|
||||
#define cube256_4way_update_close cube_4way_update
|
||||
#define cube256_4way_close cube_4way_update
|
||||
#define cube256_4way_full( sp, output, data, size ) \
|
||||
cube_4way_full( sp, output, 256, data, size )
|
||||
#define cube256_4x256_full( sp, output, data, size ) \
|
||||
cube_4x256_full( sp, output, 256, data, size )
|
||||
|
||||
#endif
|
||||
|
||||
// 2x128, 2 way parallel SSE2
|
||||
|
@@ -22,18 +22,26 @@ typedef struct
|
||||
} echo_4way_context __attribute__ ((aligned (64)));
|
||||
|
||||
int echo_4way_init( echo_4way_context *state, int hashbitlen );
|
||||
|
||||
#define echo512_4way_init( state ) echo_4way_init( state, 512 )
|
||||
#define echo256_4way_init( state ) echo_4way_init( state, 256 )
|
||||
|
||||
int echo_4way_update( echo_4way_context *state, const void *data,
|
||||
unsigned int databitlen);
|
||||
#define echo512_4way_update echo_4way_update
|
||||
|
||||
int echo_close( echo_4way_context *state, void *hashval );
|
||||
#define echo512_4way_close echo_4way_close
|
||||
|
||||
int echo_4way_update_close( echo_4way_context *state, void *hashval,
|
||||
const void *data, int databitlen );
|
||||
#define echo512_4way_update_close echo_4way_update_close
|
||||
|
||||
int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize,
|
||||
const void *data, int datalen );
|
||||
#define echo512_4way_full( state, hashval, data, datalen ) \
|
||||
echo_4way_full( state, hashval, 512, data, datalen )
|
||||
#define echo256_4way_full( state, hashval, data, datalen ) \
|
||||
echo_4way_full( state, hashval, 256, data, datalen )
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -74,6 +74,14 @@ void sph_fugue512_close(void *cc, void *dst);
|
||||
void sph_fugue512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#define sph_fugue512_full( cc, dst, data, len ) \
|
||||
do{ \
|
||||
sph_fugue512_init( cc ); \
|
||||
sph_fugue512( cc, data, len ); \
|
||||
sph_fugue512_close( cc, dst ); \
|
||||
}while(0)
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@@ -76,37 +76,34 @@ int scanhash_allium( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t _ALIGN(128) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x3ffff;
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
edata[i] = bswap_32( pdata[i] );
|
||||
|
||||
sph_blake256_init( &allium_ctx.blake );
|
||||
sph_blake256( &allium_ctx.blake, endiandata, 64 );
|
||||
sph_blake256( &allium_ctx.blake, edata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( &endiandata[19], nonce );
|
||||
allium_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
edata[19] = nonce;
|
||||
allium_hash( hash, edata );
|
||||
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
pdata[19] = bswap_32( nonce );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -94,12 +94,12 @@ bool lyra2rev2_thread_init()
|
||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
||||
#if defined (LYRA2REV2_8WAY)
|
||||
#if defined (LYRA2REV2_16WAY)
|
||||
l2v2_wholeMatrix = _mm_malloc( 2 * size, 64 ); // 2 way
|
||||
init_lyra2rev2_8way_ctx();;
|
||||
#elif defined (LYRA2REV2_4WAY)
|
||||
init_lyra2rev2_16way_ctx();;
|
||||
#elif defined (LYRA2REV2_8WAY)
|
||||
l2v2_wholeMatrix = _mm_malloc( size, 64 );
|
||||
init_lyra2rev2_4way_ctx();;
|
||||
init_lyra2rev2_8way_ctx();;
|
||||
#else
|
||||
l2v2_wholeMatrix = _mm_malloc( size, 64 );
|
||||
init_lyra2rev2_ctx();
|
||||
@@ -109,17 +109,17 @@ bool lyra2rev2_thread_init()
|
||||
|
||||
bool register_lyra2rev2_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (LYRA2REV2_8WAY)
|
||||
#if defined (LYRA2REV2_16WAY)
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2_16way;
|
||||
gate->hash = (void*)&lyra2rev2_16way_hash;
|
||||
#elif defined (LYRA2REV2_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2_8way;
|
||||
gate->hash = (void*)&lyra2rev2_8way_hash;
|
||||
#elif defined (LYRA2REV2_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2_4way;
|
||||
gate->hash = (void*)&lyra2rev2_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2;
|
||||
gate->hash = (void*)&lyra2rev2_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
|
@@ -51,30 +51,32 @@ bool init_lyra2rev3_ctx();
|
||||
//////////////////////////////////
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define LYRA2REV2_8WAY 1
|
||||
#define LYRA2REV2_16WAY 1
|
||||
#elif defined(__AVX2__)
|
||||
#define LYRA2REV2_4WAY 1
|
||||
#define LYRA2REV2_8WAY 1
|
||||
#endif
|
||||
|
||||
extern __thread uint64_t* l2v2_wholeMatrix;
|
||||
|
||||
bool register_lyra2rev2_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(LYRA2REV2_8WAY)
|
||||
#if defined(LYRA2REV2_16WAY)
|
||||
|
||||
void lyra2rev2_16way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev2_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_lyra2rev2_16way_ctx();
|
||||
|
||||
#elif defined(LYRA2REV2_8WAY)
|
||||
|
||||
void lyra2rev2_8way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_lyra2rev2_8way_ctx();
|
||||
|
||||
#elif defined(LYRA2REV2_4WAY)
|
||||
|
||||
void lyra2rev2_4way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_lyra2rev2_4way_ctx();
|
||||
|
||||
#else
|
||||
|
||||
void lyra2rev2_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev2( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
@@ -7,23 +7,227 @@
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
|
||||
#if defined (LYRA2REV2_8WAY)
|
||||
|
||||
#if defined (LYRA2REV2_16WAY)
|
||||
|
||||
typedef struct {
|
||||
blake256_16way_context blake;
|
||||
keccak256_8way_context keccak;
|
||||
cubehashParam cube;
|
||||
skein256_8way_context skein;
|
||||
bmw256_16way_context bmw;
|
||||
} lyra2v2_16way_ctx_holder __attribute__ ((aligned (64)));
|
||||
|
||||
static lyra2v2_16way_ctx_holder l2v2_16way_ctx;
|
||||
|
||||
bool init_lyra2rev2_16way_ctx()
|
||||
{
|
||||
keccak256_8way_init( &l2v2_16way_ctx.keccak );
|
||||
cubehashInit( &l2v2_16way_ctx.cube, 256, 16, 32 );
|
||||
skein256_8way_init( &l2v2_16way_ctx.skein );
|
||||
bmw256_16way_init( &l2v2_16way_ctx.bmw );
|
||||
return true;
|
||||
}
|
||||
|
||||
void lyra2rev2_16way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash8[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash9[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash10[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash11[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash12[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash13[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash14[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash15[8] __attribute__ ((aligned (64)));
|
||||
lyra2v2_16way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &l2v2_16way_ctx, sizeof(l2v2_16way_ctx) );
|
||||
|
||||
blake256_16way_update( &ctx.blake, input + (64<<4), 16 );
|
||||
blake256_16way_close( &ctx.blake, vhash );
|
||||
|
||||
dintrlv_16x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7,
|
||||
hash8, hash9, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15, vhash, 256 );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
keccak256_8way_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
intrlv_8x64( vhash, hash8, hash9, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15, 256 );
|
||||
|
||||
keccak256_8way_init( &ctx.keccak );
|
||||
keccak256_8way_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
dintrlv_8x64( hash8, hash9, hash10, hash11,
|
||||
hash12, hash13, hash14, hash5, vhash, 256 );
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash8, 256, (const byte*) hash8, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash9, 256, (const byte*) hash9, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash10, 256, (const byte*) hash10, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash11, 256, (const byte*) hash11, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash12, 256, (const byte*) hash12, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash13, 256, (const byte*) hash13, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash14, 256, (const byte*) hash14, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash15, 256, (const byte*) hash15, 32 );
|
||||
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash8, hash9, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash8, hash9, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash10, hash11, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash10, hash11, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash12, hash13, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash12, hash13, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash14, hash15, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash14, hash15, vhash, 256 );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, 256 );
|
||||
skein256_8way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
intrlv_8x64( vhash, hash8, hash9, hash10, hash11, hash12,
|
||||
hash13, hash14, hash15, 256 );
|
||||
|
||||
skein256_8way_init( &ctx.skein );
|
||||
skein256_8way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhash );
|
||||
|
||||
dintrlv_8x64( hash8, hash9, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15, vhash, 256 );
|
||||
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash8, 256, (const byte*) hash8, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash9, 256, (const byte*) hash9, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash10, 256, (const byte*) hash10, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash11, 256, (const byte*) hash11, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash12, 256, (const byte*) hash12, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash13, 256, (const byte*) hash13, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash14, 256, (const byte*) hash14, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash15, 256, (const byte*) hash15, 32 );
|
||||
|
||||
intrlv_16x32( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7,
|
||||
hash8, hash9, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15, 256 );
|
||||
|
||||
bmw256_16way_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_16way_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev2_16way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashd7 = &hash[7*16];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m512i *noncev = (__m512i*)vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( bench ) ptarget[7] = 0x0000ff;
|
||||
|
||||
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+ 1, n );
|
||||
blake256_16way_init( &l2v2_16way_ctx.blake );
|
||||
blake256_16way_update( &l2v2_16way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
lyra2rev2_16way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
if ( unlikely( hashd7[lane] <= targ32 ) )
|
||||
{
|
||||
extr_lane_16x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
|
||||
n += 16;
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined (LYRA2REV2_8WAY)
|
||||
|
||||
typedef struct {
|
||||
blake256_8way_context blake;
|
||||
keccak256_8way_context keccak;
|
||||
cube_4way_context cube;
|
||||
skein256_8way_context skein;
|
||||
bmw256_8way_context bmw;
|
||||
keccak256_4way_context keccak;
|
||||
cubehashParam cube;
|
||||
skein256_4way_context skein;
|
||||
bmw256_8way_context bmw;
|
||||
} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
|
||||
|
||||
static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
|
||||
|
||||
bool init_lyra2rev2_8way_ctx()
|
||||
{
|
||||
keccak256_8way_init( &l2v2_8way_ctx.keccak );
|
||||
cube_4way_init( &l2v2_8way_ctx.cube, 256, 16, 32 );
|
||||
skein256_8way_init( &l2v2_8way_ctx.skein );
|
||||
keccak256_4way_init( &l2v2_8way_ctx.keccak );
|
||||
cubehashInit( &l2v2_8way_ctx.cube, 256, 16, 32 );
|
||||
skein256_4way_init( &l2v2_8way_ctx.skein );
|
||||
bmw256_8way_init( &l2v2_8way_ctx.bmw );
|
||||
return true;
|
||||
}
|
||||
@@ -31,8 +235,6 @@ bool init_lyra2rev2_8way_ctx()
|
||||
void lyra2rev2_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhashB[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
@@ -47,103 +249,113 @@ void lyra2rev2_8way_hash( void *state, const void *input )
|
||||
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
|
||||
blake256_8way_close( &ctx.blake, vhash );
|
||||
|
||||
rintrlv_8x32_8x64( vhashA, vhash, 256 );
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
|
||||
keccak256_8way_close( &ctx.keccak, vhash );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
keccak256_4way_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
|
||||
keccak256_4way_init( &ctx.keccak );
|
||||
keccak256_4way_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 256 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
|
||||
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, 256 );
|
||||
|
||||
skein256_8way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 256 );
|
||||
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash4, 32, hash4, 32, hash4, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash5, 32, hash5, 32, hash5, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash6, 32, hash6, 32, hash6, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash7, 32, hash7, 32, hash7, 32, 1, 4, 4 );
|
||||
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
skein256_4way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
|
||||
skein256_4way_init( &ctx.skein );
|
||||
skein256_4way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, 256 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||
|
||||
intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
bmw256_8way_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_8way_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev2_8way( struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashd7 = &hash[7*8];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id;
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
if ( bench ) ptarget[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
|
||||
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
|
||||
blake256_8way_init( &l2v2_8way_ctx.blake );
|
||||
blake256_8way_update( &l2v2_8way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
n+3, n+2, n+1, n ) );
|
||||
|
||||
lyra2rev2_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hashd7[lane] <= targ32 ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
|
||||
n += 8;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
#elif defined (LYRA2REV2_4WAY)
|
||||
|
||||
typedef struct {
|
||||
@@ -226,15 +438,16 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *hashd7 = &(hash[7<<2]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
@@ -249,21 +462,22 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
||||
lyra2rev2_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hashd7[lane] <= targ32 )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
*/
|
||||
|
@@ -99,7 +99,7 @@ int scanhash_lyra2rev2( struct work *work,
|
||||
lyra2rev2_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg )
|
||||
if( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
if( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
|
@@ -130,15 +130,15 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &hash[7<<4];
|
||||
uint32_t *hashd7 = &hash[7*16];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m512i *noncev = (__m512i*)vdata + 19; // aligned
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m512i *noncev = (__m512i*)vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
@@ -159,10 +159,10 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
if ( unlikely( hash7[lane] <= Htarg ) )
|
||||
if ( unlikely( hashd7[lane] <= targ32 ) )
|
||||
{
|
||||
extr_lane_16x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
@@ -170,6 +170,7 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
|
||||
}
|
||||
n += 16;
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
@@ -194,7 +195,7 @@ bool init_lyra2rev3_8way_ctx()
|
||||
|
||||
void lyra2rev3_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (32)));
|
||||
@@ -250,17 +251,17 @@ void lyra2rev3_8way_hash( void *state, const void *input )
|
||||
int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &hash[7<<3];
|
||||
uint32_t *hashd7 = &hash[7*8];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
@@ -277,7 +278,7 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash7[lane] <= Htarg ) )
|
||||
if ( unlikely( hashd7[lane] <= targ32 ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
@@ -357,42 +358,41 @@ int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *hashd7 = &(hash[7*4]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
const int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
*noncev = _mm_set_epi32( n+3, n+2, n+1, n );
|
||||
|
||||
blake256_4way_init( &l2v3_4way_ctx.blake );
|
||||
blake256_4way_update( &l2v3_4way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
||||
lyra2rev3_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hashd7[lane] <= targ32 )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) );
|
||||
n += 4;
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -88,7 +88,7 @@ int scanhash_lyra2rev3( struct work *work,
|
||||
lyra2rev3_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg )
|
||||
if( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
if( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
|
@@ -56,7 +56,7 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id;
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x0000ff;
|
||||
@@ -65,14 +65,13 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
}
|
||||
|
||||
lyra2z_midstate( endiandata );
|
||||
lyra2z_midstate( endiandata );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
lyra2z_hash( hash, endiandata );
|
||||
|
||||
if ( hash[7] <= Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
|
@@ -9,7 +9,7 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
|
||||
{
|
||||
uint32_t _ALIGN(256) hash[16];
|
||||
|
||||
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, input, 80, input, 80,
|
||||
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, input, 80, input, 80,
|
||||
2, 330, 256 );
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
@@ -18,38 +18,40 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
|
||||
int scanhash_lyra2z330( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8] __attribute__ ((aligned (128)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
do
|
||||
{
|
||||
be32enc( &endiandata[19], nonce );
|
||||
lyra2z330_hash( hash, endiandata, work->height );
|
||||
if ( hash[7] <= Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
edata[19] = nonce;
|
||||
|
||||
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, edata, 80, edata, 80,
|
||||
2, 330, 256 );
|
||||
|
||||
// lyra2z330_hash( hash, edata, work->height );
|
||||
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
be32enc( pdata + 19, nonce );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
*hashes_done = nonce - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -1,18 +1,241 @@
|
||||
#include "cpuminer-config.h"
|
||||
#include "anime-gate.h"
|
||||
|
||||
#if defined (ANIME_4WAY)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#if defined(__VAES__)
|
||||
#include "algo/groestl/groestl512-hash-4way.h"
|
||||
#endif
|
||||
|
||||
#if defined (ANIME_8WAY)
|
||||
|
||||
typedef struct {
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
#if defined(__VAES__)
|
||||
groestl512_4way_context groestl;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
#endif
|
||||
jh512_8way_context jh;
|
||||
skein512_8way_context skein;
|
||||
keccak512_8way_context keccak;
|
||||
} anime_8way_ctx_holder;
|
||||
|
||||
anime_8way_ctx_holder anime_8way_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void init_anime_8way_ctx()
|
||||
{
|
||||
blake512_8way_init( &anime_8way_ctx.blake );
|
||||
bmw512_8way_init( &anime_8way_ctx.bmw );
|
||||
#if defined(__VAES__)
|
||||
groestl512_4way_init( &anime_8way_ctx.groestl, 64 );
|
||||
#else
|
||||
init_groestl( &anime_8way_ctx.groestl, 64 );
|
||||
#endif
|
||||
skein512_8way_init( &anime_8way_ctx.skein );
|
||||
jh512_8way_init( &anime_8way_ctx.jh );
|
||||
keccak512_8way_init( &anime_8way_ctx.keccak );
|
||||
}
|
||||
|
||||
void anime_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint64_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||
uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashB[8*8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashC[8*8] __attribute__ ((aligned (64)));
|
||||
#if !defined(__VAES__)
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash7[8] __attribute__ ((aligned (64)));
|
||||
#endif
|
||||
__m512i* vh = (__m512i*)vhash;
|
||||
__m512i* vhA = (__m512i*)vhashA;
|
||||
__m512i* vhB = (__m512i*)vhashB;
|
||||
__m512i* vhC = (__m512i*)vhashC;
|
||||
const __m512i bit3_mask = m512_const1_64( 8 );
|
||||
const __m512i zero = _mm512_setzero_si512();
|
||||
__mmask8 vh_mask;
|
||||
anime_8way_ctx_holder ctx;
|
||||
memcpy( &ctx, &anime_8way_ctx, sizeof(anime_8way_ctx) );
|
||||
|
||||
bmw512_8way_full( &ctx.bmw, vhash, input, 80 );
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhash, vhash, 64 );
|
||||
|
||||
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||
zero );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
if ( ( vh_mask & 0x0f ) != 0x0f )
|
||||
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
if ( ( vh_mask & 0xf0 ) != 0xf0 )
|
||||
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_4x128_8x64( vhashC, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
|
||||
if ( hash0[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
if ( hash1[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
if ( hash2[0] & 8)
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
if ( hash3[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
if ( hash4[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||
if ( hash5[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||
if ( hash6[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||
if ( hash7[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||
|
||||
intrlv_8x64_512( vhashC, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
#endif
|
||||
|
||||
if ( vh_mask & 0xff )
|
||||
skein512_8way_full( &ctx.skein, vhashB, vhash, 64 );
|
||||
|
||||
mm512_blend_hash_8x64( vh, vhC, vhB, vh_mask );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
#endif
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
|
||||
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||
zero );
|
||||
|
||||
if ( ( vh_mask & 0xff ) != 0xff )
|
||||
blake512_8way_full( &ctx.blake, vhashA, vhash, 64 );
|
||||
if ( vh_mask & 0xff )
|
||||
bmw512_8way_full( &ctx.bmw, vhashB, vhash, 64 );
|
||||
|
||||
mm512_blend_hash_8x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||
zero );
|
||||
|
||||
if ( ( vh_mask & 0xff ) != 0xff )
|
||||
{
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhashA );
|
||||
}
|
||||
if ( vh_mask & 0xff )
|
||||
{
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhashB );
|
||||
}
|
||||
|
||||
casti_m512i( state,0 ) = _mm512_mask_blend_epi64( vh_mask, vhA[0], vhB[0] );
|
||||
casti_m512i( state,1 ) = _mm512_mask_blend_epi64( vh_mask, vhA[1], vhB[1] );
|
||||
casti_m512i( state,2 ) = _mm512_mask_blend_epi64( vh_mask, vhA[2], vhB[2] );
|
||||
casti_m512i( state,3 ) = _mm512_mask_blend_epi64( vh_mask, vhA[3], vhB[3] );
|
||||
}
|
||||
|
||||
int scanhash_anime_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint64_t hash64[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint64_t *hash64_q3 = &(hash64[3*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
|
||||
do
|
||||
{
|
||||
anime_8way_hash( hash64, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash64_q3[ lane ] <= targ64_q3 && !bench ) )
|
||||
{
|
||||
extr_lane_8x64( lane_hash, hash64, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined (ANIME_4WAY)
|
||||
|
||||
typedef struct {
|
||||
blake512_4way_context blake;
|
||||
@@ -23,18 +246,6 @@ typedef struct {
|
||||
keccak512_4way_context keccak;
|
||||
} anime_4way_ctx_holder;
|
||||
|
||||
anime_4way_ctx_holder anime_4way_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void init_anime_4way_ctx()
|
||||
{
|
||||
blake512_4way_init( &anime_4way_ctx.blake );
|
||||
bmw512_4way_init( &anime_4way_ctx.bmw );
|
||||
init_groestl( &anime_4way_ctx.groestl, 64 );
|
||||
skein512_4way_init( &anime_4way_ctx.skein );
|
||||
jh512_4way_init( &anime_4way_ctx.jh );
|
||||
keccak512_4way_init( &anime_4way_ctx.keccak );
|
||||
}
|
||||
|
||||
void anime_4way_hash( void *state, const void *input )
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
@@ -48,81 +259,61 @@ void anime_4way_hash( void *state, const void *input )
|
||||
__m256i* vhA = (__m256i*)vhashA;
|
||||
__m256i* vhB = (__m256i*)vhashB;
|
||||
__m256i vh_mask;
|
||||
const uint32_t mask = 8;
|
||||
int h_mask;
|
||||
const __m256i bit3_mask = m256_const1_64( 8 );
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
anime_4way_ctx_holder ctx;
|
||||
memcpy( &ctx, &anime_4way_ctx, sizeof(anime_4way_ctx) );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, input, 80 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
blake512_4way_full( &ctx.blake, vhash, vhash, 64 );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
if ( hash0[0] & mask )
|
||||
{
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, 512 );
|
||||
}
|
||||
if ( hash1[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(char*)hash1, 512 );
|
||||
}
|
||||
if ( hash2[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(char*)hash2, 512 );
|
||||
}
|
||||
if ( hash3[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
}
|
||||
// A
|
||||
if ( hash0[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
if ( hash1[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
if ( hash2[0] & 8)
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
if ( hash3[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
}
|
||||
// B
|
||||
if ( h_mask & 0xffffffff )
|
||||
skein512_4way_full( &ctx.skein, vhashB, vhash, 64 );
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhashA );
|
||||
}
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
// A
|
||||
if ( ( h_mask & 0xffffffff ) != 0xffffffff )
|
||||
blake512_4way_full( &ctx.blake, vhashA, vhash, 64 );
|
||||
// B
|
||||
if ( h_mask & 0xffffffff )
|
||||
{
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
@@ -131,64 +322,74 @@ void anime_4way_hash( void *state, const void *input )
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
// A
|
||||
if ( ( h_mask & 0xffffffff ) != 0xffffffff )
|
||||
{
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||
}
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
// B
|
||||
if ( h_mask & 0xffffffff )
|
||||
{
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhashB );
|
||||
}
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
dintrlv_4x64( state, state+32, state+64, state+96, vhash, 256 );
|
||||
casti_m256i( state, 0 ) = _mm256_blendv_epi8( vhA[0], vhB[0], vh_mask );
|
||||
casti_m256i( state, 1 ) = _mm256_blendv_epi8( vhA[1], vhB[1], vh_mask );
|
||||
casti_m256i( state, 2 ) = _mm256_blendv_epi8( vhA[2], vhB[2], vh_mask );
|
||||
casti_m256i( state, 3 ) = _mm256_blendv_epi8( vhA[3], vhB[3], vh_mask );
|
||||
}
|
||||
|
||||
int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash64[4*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint64_t *hash64_q3 = &(hash64[3*4]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
|
||||
do
|
||||
{
|
||||
anime_4way_hash( hash, vdata );
|
||||
anime_4way_hash( hash64, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( valid_hash( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( unlikely( hash64_q3[ lane ] <= targ64_q3 && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n+i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
extr_lane_4x64( lane_hash, hash64, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
|
@@ -2,8 +2,10 @@
|
||||
|
||||
bool register_anime_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (ANIME_4WAY)
|
||||
init_anime_4way_ctx();
|
||||
#if defined (ANIME_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_anime_8way;
|
||||
gate->hash = (void*)&anime_8way_hash;
|
||||
#elif defined (ANIME_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_anime_4way;
|
||||
gate->hash = (void*)&anime_4way_hash;
|
||||
#else
|
||||
@@ -11,7 +13,7 @@ bool register_anime_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_anime;
|
||||
gate->hash = (void*)&anime_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -4,18 +4,25 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define ANIME_4WAY
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define ANIME_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define ANIME_4WAY 1
|
||||
#endif
|
||||
|
||||
bool register_anime_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(ANIME_4WAY)
|
||||
#if defined(ANIME_8WAY)
|
||||
|
||||
void anime_8way_hash( void *state, const void *input );
|
||||
int scanhash_anime_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(ANIME_4WAY)
|
||||
|
||||
void anime_4way_hash( void *state, const void *input );
|
||||
int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_anime_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -72,12 +72,10 @@ void quark_8way_hash( void *state, const void *input )
|
||||
|
||||
memcpy( &ctx, &quark_8way_ctx, sizeof(quark_8way_ctx) );
|
||||
|
||||
blake512_8way_update( &ctx.blake, input, 80 );
|
||||
blake512_8way_close( &ctx.blake, vhash );
|
||||
|
||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
blake512_8way_full( &ctx.blake, vhash, input, 80 );
|
||||
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
|
||||
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||
zero );
|
||||
|
||||
@@ -86,70 +84,34 @@ void quark_8way_hash( void *state, const void *input )
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
if ( ( vh_mask & 0x0f ) != 0x0f )
|
||||
{
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, 512 );
|
||||
}
|
||||
if ( ( vh_mask & 0xf0 ) != 0xf0 )
|
||||
{
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, 512 );
|
||||
}
|
||||
rintrlv_4x128_8x64( vhashC, vhashA, vhashB, 512 );
|
||||
if ( ( vh_mask & 0x0f ) != 0x0f )
|
||||
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
if ( ( vh_mask & 0xf0 ) != 0xf0 )
|
||||
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_4x128_8x64( vhashC, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, 512 );
|
||||
|
||||
if ( hash0[0] & mask )
|
||||
{
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, 512 );
|
||||
}
|
||||
if ( hash1[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(char*)hash1, 512 );
|
||||
}
|
||||
if ( hash2[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(char*)hash2, 512 );
|
||||
}
|
||||
if ( hash3[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
}
|
||||
if ( hash4[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash4,
|
||||
(char*)hash4, 512 );
|
||||
}
|
||||
if ( hash5[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash5,
|
||||
(char*)hash5, 512 );
|
||||
}
|
||||
if ( hash6[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash6,
|
||||
(char*)hash6, 512 );
|
||||
}
|
||||
if ( hash7[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash7,
|
||||
(char*)hash7, 512 );
|
||||
}
|
||||
if ( hash0[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
if ( hash1[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
if ( hash2[0] & 8)
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
if ( hash3[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
if ( hash4[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||
if ( hash5[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||
if ( hash6[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||
if ( hash7[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||
|
||||
intrlv_8x64( vhashC, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, 512 );
|
||||
@@ -157,10 +119,7 @@ void quark_8way_hash( void *state, const void *input )
|
||||
#endif
|
||||
|
||||
if ( vh_mask & 0xff )
|
||||
{
|
||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_8way_close( &ctx.skein, vhashB );
|
||||
}
|
||||
skein512_8way_full( &ctx.skein, vhashB, vhash, 64 );
|
||||
|
||||
mm512_blend_hash_8x64( vh, vhC, vhB, vh_mask );
|
||||
|
||||
@@ -168,10 +127,10 @@ void quark_8way_hash( void *state, const void *input )
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, 512 );
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, 512 );
|
||||
if ( ( vh_mask & 0x0f ) != 0x0f )
|
||||
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
if ( ( vh_mask & 0xf0 ) != 0xf0 )
|
||||
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
@@ -180,22 +139,22 @@ void quark_8way_hash( void *state, const void *input )
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, 512 );
|
||||
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||
if ( hash0[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
if ( hash1[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
if ( hash2[0] & 8)
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
if ( hash3[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
if ( hash4[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||
if ( hash5[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||
if ( hash6[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||
if ( hash7[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
512 );
|
||||
@@ -209,27 +168,16 @@ void quark_8way_hash( void *state, const void *input )
|
||||
zero );
|
||||
|
||||
if ( ( vh_mask & 0xff ) != 0xff )
|
||||
{
|
||||
blake512_8way_init( &ctx.blake );
|
||||
blake512_8way_update( &ctx.blake, vhash, 64 );
|
||||
blake512_8way_close( &ctx.blake, vhashA );
|
||||
}
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhashA, vhash, 64 );
|
||||
if ( vh_mask & 0xff )
|
||||
{
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8way_close( &ctx.bmw, vhashB );
|
||||
}
|
||||
bmw512_8way_full( &ctx.bmw, vhashB, vhash, 64 );
|
||||
|
||||
mm512_blend_hash_8x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||
zero );
|
||||
@@ -258,41 +206,44 @@ void quark_8way_hash( void *state, const void *input )
|
||||
int scanhash_quark_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash64[4*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[49]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint64_t *hash64_q3 = &(hash64[3*8]);
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
|
||||
quark_8way_hash( hash64, vdata );
|
||||
|
||||
quark_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( unlikely( hash7[ i<<1 ] <= Htarg ) )
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash64_q3[ lane ] <= targ64_q3 && !bench ) )
|
||||
{
|
||||
extr_lane_8x64( lane_hash, hash, i, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
extr_lane_8x64( lane_hash, hash64, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, lane_hash, mythr, i );
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( ( n < max_nonce-8 ) && !work_restart[thr_id].restart );
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
@@ -333,67 +284,47 @@ void quark_4way_hash( void *state, const void *input )
|
||||
__m256i* vhA = (__m256i*)vhashA;
|
||||
__m256i* vhB = (__m256i*)vhashB;
|
||||
__m256i vh_mask;
|
||||
int h_mask;
|
||||
quark_4way_ctx_holder ctx;
|
||||
const __m256i bit3_mask = m256_const1_64( 8 );
|
||||
const uint32_t mask = 8;
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
|
||||
memcpy( &ctx, &quark_4way_ctx, sizeof(quark_4way_ctx) );
|
||||
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
if ( hash0[0] & mask )
|
||||
{
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, 512 );
|
||||
}
|
||||
if ( hash1[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(char*)hash1, 512 );
|
||||
}
|
||||
if ( hash2[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(char*)hash2, 512 );
|
||||
}
|
||||
if ( hash3[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
}
|
||||
// A
|
||||
if ( hash0[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
if ( hash1[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
if ( hash2[0] & 8)
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
if ( hash3[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
}
|
||||
// B
|
||||
if ( likely( h_mask & 0xffffffff ) )
|
||||
skein512_4way_full( &ctx.skein, vhashB, vhash, 64 );
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
@@ -401,15 +332,13 @@ void quark_4way_hash( void *state, const void *input )
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhashA );
|
||||
}
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
// A
|
||||
if ( likely( ( h_mask & 0xffffffff ) != 0xffffffff ) )
|
||||
blake512_4way_full( &ctx.blake, vhashA, vhash, 64 );
|
||||
// B
|
||||
if ( likely( h_mask & 0xffffffff ) )
|
||||
{
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
@@ -421,20 +350,20 @@ void quark_4way_hash( void *state, const void *input )
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
// A
|
||||
if ( likely( ( h_mask & 0xffffffff ) != 0xffffffff ) )
|
||||
{
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||
}
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
// B
|
||||
if ( likely( h_mask & 0xffffffff ) )
|
||||
{
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
@@ -451,41 +380,44 @@ void quark_4way_hash( void *state, const void *input )
|
||||
int scanhash_quark_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash64[4*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[25]);
|
||||
uint64_t *hash64_q3 = &(hash64[3*4]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
quark_4way_hash( hash64, vdata );
|
||||
|
||||
quark_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( unlikely( hash7[ i<<1 ] <= Htarg ) )
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash64_q3[ lane ] <= targ64_q3 && !bench )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, i, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
extr_lane_4x64( lane_hash, hash64, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, lane_hash, mythr, i );
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -380,7 +380,7 @@ static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate,
|
||||
#endif /* HAVE_SHA256_8WAY */
|
||||
|
||||
|
||||
#if defined(USE_ASM) && defined(__x86_64__)
|
||||
//#if defined(USE_ASM) && defined(__x86_64__)
|
||||
|
||||
#define SCRYPT_MAX_WAYS 12
|
||||
#define HAVE_SCRYPT_3WAY 1
|
||||
@@ -394,113 +394,6 @@ void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
|
||||
void scrypt_core_6way(uint32_t *X, uint32_t *V, int N);
|
||||
#endif
|
||||
|
||||
#elif defined(USE_ASM) && defined(__i386__)
|
||||
|
||||
#define SCRYPT_MAX_WAYS 4
|
||||
#define scrypt_best_throughput() 1
|
||||
void scrypt_core(uint32_t *X, uint32_t *V, int N);
|
||||
|
||||
#elif defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
|
||||
|
||||
void scrypt_core(uint32_t *X, uint32_t *V, int N);
|
||||
#if defined(__ARM_NEON__)
|
||||
#undef HAVE_SHA256_4WAY
|
||||
#define SCRYPT_MAX_WAYS 3
|
||||
#define HAVE_SCRYPT_3WAY 1
|
||||
#define scrypt_best_throughput() 3
|
||||
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
|
||||
{
|
||||
uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
|
||||
int i;
|
||||
|
||||
x00 = (B[ 0] ^= Bx[ 0]);
|
||||
x01 = (B[ 1] ^= Bx[ 1]);
|
||||
x02 = (B[ 2] ^= Bx[ 2]);
|
||||
x03 = (B[ 3] ^= Bx[ 3]);
|
||||
x04 = (B[ 4] ^= Bx[ 4]);
|
||||
x05 = (B[ 5] ^= Bx[ 5]);
|
||||
x06 = (B[ 6] ^= Bx[ 6]);
|
||||
x07 = (B[ 7] ^= Bx[ 7]);
|
||||
x08 = (B[ 8] ^= Bx[ 8]);
|
||||
x09 = (B[ 9] ^= Bx[ 9]);
|
||||
x10 = (B[10] ^= Bx[10]);
|
||||
x11 = (B[11] ^= Bx[11]);
|
||||
x12 = (B[12] ^= Bx[12]);
|
||||
x13 = (B[13] ^= Bx[13]);
|
||||
x14 = (B[14] ^= Bx[14]);
|
||||
x15 = (B[15] ^= Bx[15]);
|
||||
for (i = 0; i < 8; i += 2) {
|
||||
#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
|
||||
/* Operate on columns. */
|
||||
x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7);
|
||||
x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7);
|
||||
|
||||
x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9);
|
||||
x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9);
|
||||
|
||||
x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13);
|
||||
x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13);
|
||||
|
||||
x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18);
|
||||
x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18);
|
||||
|
||||
/* Operate on rows. */
|
||||
x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7);
|
||||
x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7);
|
||||
|
||||
x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9);
|
||||
x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9);
|
||||
|
||||
x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13);
|
||||
x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13);
|
||||
|
||||
x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18);
|
||||
x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18);
|
||||
#undef R
|
||||
}
|
||||
B[ 0] += x00;
|
||||
B[ 1] += x01;
|
||||
B[ 2] += x02;
|
||||
B[ 3] += x03;
|
||||
B[ 4] += x04;
|
||||
B[ 5] += x05;
|
||||
B[ 6] += x06;
|
||||
B[ 7] += x07;
|
||||
B[ 8] += x08;
|
||||
B[ 9] += x09;
|
||||
B[10] += x10;
|
||||
B[11] += x11;
|
||||
B[12] += x12;
|
||||
B[13] += x13;
|
||||
B[14] += x14;
|
||||
B[15] += x15;
|
||||
}
|
||||
|
||||
static inline void scrypt_core(uint32_t *X, uint32_t *V, int N)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
memcpy(&V[i * 32], X, 128);
|
||||
xor_salsa8(&X[0], &X[16]);
|
||||
xor_salsa8(&X[16], &X[0]);
|
||||
}
|
||||
for (i = 0; i < N; i++) {
|
||||
uint32_t j = 32 * (X[16] & (N - 1));
|
||||
for (uint8_t k = 0; k < 32; k++)
|
||||
X[k] ^= V[j + k];
|
||||
xor_salsa8(&X[0], &X[16]);
|
||||
xor_salsa8(&X[16], &X[0]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef SCRYPT_MAX_WAYS
|
||||
#define SCRYPT_MAX_WAYS 1
|
||||
#define scrypt_best_throughput() 1
|
||||
@@ -511,8 +404,8 @@ unsigned char *scrypt_buffer_alloc(int N)
|
||||
return (uchar*) malloc((size_t)N * SCRYPT_MAX_WAYS * 128 + 63);
|
||||
}
|
||||
|
||||
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
|
||||
uint32_t *midstate, unsigned char *scratchpad, int N)
|
||||
static bool scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
|
||||
uint32_t *midstate, unsigned char *scratchpad, int N, int thr_id )
|
||||
{
|
||||
uint32_t tstate[8], ostate[8];
|
||||
uint32_t X[32];
|
||||
@@ -527,11 +420,13 @@ static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
|
||||
scrypt_core(X, V, N);
|
||||
|
||||
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
static void scrypt_1024_1_1_256_4way(const uint32_t *input,
|
||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
|
||||
static bool scrypt_1024_1_1_256_4way(const uint32_t *input,
|
||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N,
|
||||
int thrid )
|
||||
{
|
||||
uint32_t _ALIGN(128) tstate[4 * 8];
|
||||
uint32_t _ALIGN(128) ostate[4 * 8];
|
||||
@@ -545,32 +440,43 @@ static void scrypt_1024_1_1_256_4way(const uint32_t *input,
|
||||
for (i = 0; i < 20; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
W[4 * i + k] = input[k * 20 + i];
|
||||
for (i = 0; i < 8; i++)
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
tstate[4 * i + k] = midstate[i];
|
||||
HMAC_SHA256_80_init_4way(W, tstate, ostate);
|
||||
PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
|
||||
for (i = 0; i < 32; i++)
|
||||
|
||||
HMAC_SHA256_80_init_4way(W, tstate, ostate);
|
||||
|
||||
PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
X[k * 32 + i] = W[4 * i + k];
|
||||
scrypt_core(X + 0 * 32, V, N);
|
||||
|
||||
scrypt_core(X + 0 * 32, V, N);
|
||||
scrypt_core(X + 1 * 32, V, N);
|
||||
scrypt_core(X + 2 * 32, V, N);
|
||||
scrypt_core(X + 3 * 32, V, N);
|
||||
for (i = 0; i < 32; i++)
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
W[4 * i + k] = X[k * 32 + i];
|
||||
PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
|
||||
for (i = 0; i < 8; i++)
|
||||
|
||||
PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
output[k * 8 + i] = W[4 * i + k];
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif /* HAVE_SHA256_4WAY */
|
||||
|
||||
#ifdef HAVE_SCRYPT_3WAY
|
||||
|
||||
static void scrypt_1024_1_1_256_3way(const uint32_t *input,
|
||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
|
||||
static bool scrypt_1024_1_1_256_3way(const uint32_t *input,
|
||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N,
|
||||
int thrid )
|
||||
{
|
||||
uint32_t _ALIGN(64) tstate[3 * 8], ostate[3 * 8];
|
||||
uint32_t _ALIGN(64) X[3 * 32];
|
||||
@@ -581,23 +487,34 @@ static void scrypt_1024_1_1_256_3way(const uint32_t *input,
|
||||
memcpy(tstate + 0, midstate, 32);
|
||||
memcpy(tstate + 8, midstate, 32);
|
||||
memcpy(tstate + 16, midstate, 32);
|
||||
HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0);
|
||||
|
||||
HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0);
|
||||
HMAC_SHA256_80_init(input + 20, tstate + 8, ostate + 8);
|
||||
HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16);
|
||||
PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0);
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0);
|
||||
PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
|
||||
PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
|
||||
|
||||
scrypt_core_3way(X, V, N);
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
|
||||
scrypt_core_3way(X, V, N);
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
|
||||
PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
|
||||
PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
static void scrypt_1024_1_1_256_12way(const uint32_t *input,
|
||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
|
||||
static bool scrypt_1024_1_1_256_12way(const uint32_t *input,
|
||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N,
|
||||
int thrid )
|
||||
{
|
||||
uint32_t _ALIGN(128) tstate[12 * 8];
|
||||
uint32_t _ALIGN(128) ostate[12 * 8];
|
||||
@@ -612,43 +529,60 @@ static void scrypt_1024_1_1_256_12way(const uint32_t *input,
|
||||
for (i = 0; i < 20; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i];
|
||||
for (j = 0; j < 3; j++)
|
||||
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
tstate[32 * j + 4 * i + k] = midstate[i];
|
||||
HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0);
|
||||
|
||||
HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0);
|
||||
HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32);
|
||||
HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64);
|
||||
PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||
PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128);
|
||||
PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256);
|
||||
for (j = 0; j < 3; j++)
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
|
||||
scrypt_core_3way(X + 0 * 96, V, N);
|
||||
|
||||
scrypt_core_3way(X + 0 * 96, V, N);
|
||||
scrypt_core_3way(X + 1 * 96, V, N);
|
||||
scrypt_core_3way(X + 2 * 96, V, N);
|
||||
scrypt_core_3way(X + 3 * 96, V, N);
|
||||
for (j = 0; j < 3; j++)
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i];
|
||||
PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||
|
||||
PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||
PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128);
|
||||
PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256);
|
||||
for (j = 0; j < 3; j++)
|
||||
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k];
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif /* HAVE_SHA256_4WAY */
|
||||
|
||||
#endif /* HAVE_SCRYPT_3WAY */
|
||||
|
||||
#ifdef HAVE_SCRYPT_6WAY
|
||||
static void scrypt_1024_1_1_256_24way(const uint32_t *input,
|
||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
|
||||
static bool scrypt_1024_1_1_256_24way( const uint32_t *input,
|
||||
uint32_t *output, uint32_t *midstate,
|
||||
unsigned char *scratchpad, int N, int thrid )
|
||||
{
|
||||
uint32_t _ALIGN(128) tstate[24 * 8];
|
||||
uint32_t _ALIGN(128) ostate[24 * 8];
|
||||
@@ -657,41 +591,57 @@ static void scrypt_1024_1_1_256_24way(const uint32_t *input,
|
||||
uint32_t *V;
|
||||
int i, j, k;
|
||||
|
||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
||||
V = (uint32_t *)( ( (uintptr_t)(scratchpad) + 63 ) & ~ (uintptr_t)(63) );
|
||||
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 20; i++)
|
||||
for (k = 0; k < 8; k++)
|
||||
for ( j = 0; j < 3; j++ )
|
||||
for ( i = 0; i < 20; i++ )
|
||||
for ( k = 0; k < 8; k++ )
|
||||
W[8 * 32 * j + 8 * i + k] = input[8 * 20 * j + k * 20 + i];
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 8; k++)
|
||||
|
||||
for ( j = 0; j < 3; j++ )
|
||||
for ( i = 0; i < 8; i++ )
|
||||
for ( k = 0; k < 8; k++ )
|
||||
tstate[8 * 8 * j + 8 * i + k] = midstate[i];
|
||||
HMAC_SHA256_80_init_8way(W + 0, tstate + 0, ostate + 0);
|
||||
HMAC_SHA256_80_init_8way(W + 256, tstate + 64, ostate + 64);
|
||||
HMAC_SHA256_80_init_8way(W + 512, tstate + 128, ostate + 128);
|
||||
PBKDF2_SHA256_80_128_8way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||
PBKDF2_SHA256_80_128_8way(tstate + 64, ostate + 64, W + 256, W + 256);
|
||||
PBKDF2_SHA256_80_128_8way(tstate + 128, ostate + 128, W + 512, W + 512);
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 8; k++)
|
||||
|
||||
HMAC_SHA256_80_init_8way( W + 0, tstate + 0, ostate + 0 );
|
||||
HMAC_SHA256_80_init_8way( W + 256, tstate + 64, ostate + 64 );
|
||||
HMAC_SHA256_80_init_8way( W + 512, tstate + 128, ostate + 128 );
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
PBKDF2_SHA256_80_128_8way( tstate + 0, ostate + 0, W + 0, W + 0 );
|
||||
PBKDF2_SHA256_80_128_8way( tstate + 64, ostate + 64, W + 256, W + 256 );
|
||||
PBKDF2_SHA256_80_128_8way( tstate + 128, ostate + 128, W + 512, W + 512 );
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
for ( j = 0; j < 3; j++ )
|
||||
for ( i = 0; i < 32; i++ )
|
||||
for ( k = 0; k < 8; k++ )
|
||||
X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
|
||||
scrypt_core_6way(X + 0 * 32, V, N);
|
||||
scrypt_core_6way(X + 6 * 32, V, N);
|
||||
scrypt_core_6way(X + 12 * 32, V, N);
|
||||
scrypt_core_6way(X + 18 * 32, V, N);
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 8; k++)
|
||||
|
||||
scrypt_core_6way( X + 0 * 32, V, N );
|
||||
scrypt_core_6way( X + 6 * 32, V, N );
|
||||
scrypt_core_6way( X + 12 * 32, V, N );
|
||||
scrypt_core_6way( X + 18 * 32, V, N );
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
for ( j = 0; j < 3; j++ )
|
||||
for ( i = 0; i < 32; i++ )
|
||||
for ( k = 0; k < 8; k++ )
|
||||
W[8 * 32 * j + 8 * i + k] = X[8 * 32 * j + k * 32 + i];
|
||||
PBKDF2_SHA256_128_32_8way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||
PBKDF2_SHA256_128_32_8way(tstate + 64, ostate + 64, W + 256, W + 256);
|
||||
PBKDF2_SHA256_128_32_8way(tstate + 128, ostate + 128, W + 512, W + 512);
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 8; k++)
|
||||
|
||||
PBKDF2_SHA256_128_32_8way( tstate + 0, ostate + 0, W + 0, W + 0 );
|
||||
PBKDF2_SHA256_128_32_8way( tstate + 64, ostate + 64, W + 256, W + 256 );
|
||||
PBKDF2_SHA256_128_32_8way( tstate + 128, ostate + 128, W + 512, W + 512 );
|
||||
|
||||
for ( j = 0; j < 3; j++ )
|
||||
for ( i = 0; i < 8; i++ )
|
||||
for ( k = 0; k < 8; k++ )
|
||||
output[8 * 8 * j + k * 8 + i] = W[8 * 32 * j + 8 * i + k];
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif /* HAVE_SCRYPT_6WAY */
|
||||
|
||||
@@ -703,16 +653,18 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
|
||||
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
|
||||
uint32_t midstate[8];
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int throughput = scrypt_best_throughput();
|
||||
int i;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
if (sha256_use_4way())
|
||||
throughput *= 4;
|
||||
#endif
|
||||
|
||||
// applog(LOG_INFO,"Scrypt thoughput %d",throughput);
|
||||
|
||||
for (i = 0; i < throughput; i++)
|
||||
memcpy(data + i * 20, pdata, 80);
|
||||
|
||||
@@ -720,46 +672,49 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
|
||||
sha256_transform(midstate, data, 0);
|
||||
|
||||
do {
|
||||
|
||||
bool rc = true;
|
||||
for (i = 0; i < throughput; i++)
|
||||
data[i * 20 + 19] = ++n;
|
||||
|
||||
#if defined(HAVE_SHA256_4WAY)
|
||||
if (throughput == 4)
|
||||
scrypt_1024_1_1_256_4way(data, hash, midstate,
|
||||
scratchbuf, scratchbuf_size );
|
||||
rc = scrypt_1024_1_1_256_4way(data, hash, midstate,
|
||||
scratchbuf, scratchbuf_size, thr_id );
|
||||
else
|
||||
#endif
|
||||
#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
|
||||
if (throughput == 12)
|
||||
scrypt_1024_1_1_256_12way(data, hash, midstate,
|
||||
scratchbuf, scratchbuf_size );
|
||||
rc = scrypt_1024_1_1_256_12way(data, hash, midstate,
|
||||
scratchbuf, scratchbuf_size, thr_id );
|
||||
else
|
||||
#endif
|
||||
#if defined(HAVE_SCRYPT_6WAY)
|
||||
if (throughput == 24)
|
||||
scrypt_1024_1_1_256_24way(data, hash, midstate,
|
||||
scratchbuf, scratchbuf_size );
|
||||
rc = scrypt_1024_1_1_256_24way(data, hash, midstate,
|
||||
scratchbuf, scratchbuf_size, thr_id );
|
||||
else
|
||||
#endif
|
||||
#if defined(HAVE_SCRYPT_3WAY)
|
||||
if (throughput == 3)
|
||||
scrypt_1024_1_1_256_3way(data, hash, midstate,
|
||||
scratchbuf, scratchbuf_size );
|
||||
rc = scrypt_1024_1_1_256_3way(data, hash, midstate,
|
||||
scratchbuf, scratchbuf_size, thr_id );
|
||||
else
|
||||
#endif
|
||||
scrypt_1024_1_1_256(data, hash, midstate, scratchbuf,
|
||||
scratchbuf_size );
|
||||
rc = scrypt_1024_1_1_256(data, hash, midstate, scratchbuf,
|
||||
scratchbuf_size, thr_id );
|
||||
|
||||
for (i = 0; i < throughput; i++) {
|
||||
if (unlikely(hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget))) {
|
||||
if ( rc )
|
||||
for ( i = 0; i < throughput; i++ )
|
||||
{
|
||||
if ( unlikely( valid_hash( hash + i * 8, ptarget ) ) )
|
||||
{
|
||||
pdata[19] = data[i * 20 + 19];
|
||||
submit_solution( work, hash, mythr );
|
||||
submit_solution( work, hash + i * 8, mythr );
|
||||
}
|
||||
}
|
||||
} while (likely(n < max_nonce && !work_restart[thr_id].restart));
|
||||
}
|
||||
} while ( likely( ( n < ( max_nonce - throughput ) ) && !(*restart) ) );
|
||||
|
||||
*hashes_done = n - pdata[19] + 1;
|
||||
*hashes_done = n - pdata[19];
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
@@ -778,7 +733,6 @@ bool register_scrypt_algo( algo_gate_t* gate )
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_scrypt;
|
||||
// gate->hash = (void*)&scrypt_1024_1_1_256_24way;
|
||||
opt_target_factor = 65536.0;
|
||||
|
||||
if ( !opt_param_n )
|
||||
|
@@ -33,7 +33,7 @@
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __AES__
|
||||
#if defined(__AES__)
|
||||
|
||||
#include "sph_shavite.h"
|
||||
#include "simd-utils.h"
|
||||
|
@@ -35,6 +35,8 @@
|
||||
|
||||
#include "sph_shavite.h"
|
||||
|
||||
#if !defined(__AES__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
@@ -1762,3 +1764,6 @@ sph_shavite512_sw_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // !AES
|
||||
|
||||
|
@@ -262,15 +262,9 @@ void sph_shavite384_close(void *cc, void *dst);
|
||||
void sph_shavite384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
// Always define sw but only define aesni when available
|
||||
// Define fptrs for aesni or sw, not both.
|
||||
void sph_shavite512_sw_init(void *cc);
|
||||
void sph_shavite512_sw(void *cc, const void *data, size_t len);
|
||||
void sph_shavite512_sw_close(void *cc, void *dst);
|
||||
void sph_shavite512_sw_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
//Don't call these directly from application code, use the macros below.
|
||||
#ifdef __AES__
|
||||
|
||||
void sph_shavite512_aesni_init(void *cc);
|
||||
void sph_shavite512_aesni(void *cc, const void *data, size_t len);
|
||||
void sph_shavite512_aesni_close(void *cc, void *dst);
|
||||
@@ -285,6 +279,13 @@ void sph_shavite512_aesni_addbits_and_close(
|
||||
|
||||
#else
|
||||
|
||||
void sph_shavite512_sw_init(void *cc);
|
||||
void sph_shavite512_sw(void *cc, const void *data, size_t len);
|
||||
void sph_shavite512_sw_close(void *cc, void *dst);
|
||||
void sph_shavite512_sw_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
|
||||
#define sph_shavite512_init sph_shavite512_sw_init
|
||||
#define sph_shavite512 sph_shavite512_sw
|
||||
#define sph_shavite512_close sph_shavite512_sw_close
|
||||
@@ -293,6 +294,20 @@ void sph_shavite512_aesni_addbits_and_close(
|
||||
|
||||
#endif
|
||||
|
||||
// Use these macros from application code.
|
||||
#define shavite512_context sph_shavite512_context
|
||||
|
||||
#define shavite512_init sph_shavite512_init
|
||||
#define shavite512_update sph_shavite512
|
||||
#define shavite512_close sph_shavite512_close
|
||||
|
||||
#define shavite512_full( cc, dst, data, len ) \
|
||||
do{ \
|
||||
shavite512_init( cc ); \
|
||||
shavite512_update( cc, data, len ); \
|
||||
shavite512_close( cc, dst ); \
|
||||
}while(0)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@@ -13,18 +13,18 @@
|
||||
|
||||
#if defined (SKEIN_8WAY)
|
||||
|
||||
static __thread skein512_8way_context skein512_8way_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void skeinhash_8way( void *state, const void *input )
|
||||
{
|
||||
uint64_t vhash64[8*8] __attribute__ ((aligned (128)));
|
||||
skein512_8way_context ctx_skein;
|
||||
|
||||
memcpy( &ctx_skein, &skein512_8way_ctx, sizeof( ctx_skein ) );
|
||||
uint32_t vhash32[16*8] __attribute__ ((aligned (128)));
|
||||
sha256_8way_context ctx_sha256;
|
||||
|
||||
skein512_8way_init( &ctx_skein );
|
||||
skein512_8way_update( &ctx_skein, input, 80 );
|
||||
skein512_8way_close( &ctx_skein, vhash64 );
|
||||
|
||||
skein512_8way_final16( &ctx_skein, vhash64, input + (64*8) );
|
||||
rintrlv_8x64_8x32( vhash32, vhash64, 512 );
|
||||
|
||||
sha256_8way_init( &ctx_sha256 );
|
||||
@@ -36,63 +36,70 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (128)));
|
||||
uint32_t hash[16*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t *hash_d7 = &(hash[7*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
skein512_8way_prehash64( &skein512_8way_ctx, vdata );
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
|
||||
|
||||
skeinhash_8way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( hash7[ lane ] <= Htarg )
|
||||
if ( unlikely( hash_d7[ lane ] <= targ_d7 ) && !bench )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart );
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined (SKEIN_4WAY)
|
||||
|
||||
static __thread skein512_4way_context skein512_4way_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void skeinhash_4way( void *state, const void *input )
|
||||
{
|
||||
uint64_t vhash64[8*4] __attribute__ ((aligned (128)));
|
||||
skein512_4way_context ctx_skein;
|
||||
memcpy( &ctx_skein, &skein512_4way_ctx, sizeof( ctx_skein ) );
|
||||
#if defined(__SHA__)
|
||||
uint32_t hash0[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[16] __attribute__ ((aligned (64)));
|
||||
SHA256_CTX ctx_sha256;
|
||||
SHA256_CTX ctx_sha256;
|
||||
#else
|
||||
uint32_t vhash32[16*4] __attribute__ ((aligned (64)));
|
||||
sha256_4way_context ctx_sha256;
|
||||
#endif
|
||||
|
||||
skein512_4way_init( &ctx_skein );
|
||||
skein512_4way_update( &ctx_skein, input, 80 );
|
||||
skein512_4way_close( &ctx_skein, vhash64 );
|
||||
skein512_4way_final16( &ctx_skein, vhash64, input + (64*4) );
|
||||
|
||||
#if defined(__SHA__)
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 512 );
|
||||
@@ -127,38 +134,43 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *hash_d7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_prehash64( &skein512_4way_ctx, vdata );
|
||||
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
skeinhash_4way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash7[ lane ] <= Htarg )
|
||||
if ( unlikely( ( hash_d7[ lane ] <= targ_d7 ) && !bench ) )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart );
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -4,14 +4,16 @@
|
||||
|
||||
bool register_skein_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
||||
#if defined (SKEIN_8WAY)
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||
gate->scanhash = (void*)&scanhash_skein_8way;
|
||||
gate->hash = (void*)&skeinhash_8way;
|
||||
#elif defined (SKEIN_4WAY)
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_skein_4way;
|
||||
gate->hash = (void*)&skeinhash_4way;
|
||||
#else
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_skein;
|
||||
gate->hash = (void*)&skeinhash;
|
||||
#endif
|
||||
|
@@ -654,6 +654,160 @@ skein_big_close_8way( skein512_8way_context *sc, unsigned ub, unsigned n,
|
||||
memcpy_512( dst, buf, out_len >> 3 );
|
||||
}
|
||||
|
||||
void skein512_8way_full( skein512_8way_context *sc, void *out, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m512i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
__m512i *buf = sc->buf;
|
||||
size_t ptr = 0;
|
||||
unsigned first;
|
||||
uint64_t bcount = 0;
|
||||
const int buf_size = 64; // 64 * _m256i
|
||||
|
||||
// Init
|
||||
|
||||
h0 = m512_const1_64( 0x4903ADFF749C51CE );
|
||||
h1 = m512_const1_64( 0x0D95DE399746DF03 );
|
||||
h2 = m512_const1_64( 0x8FD1934127C79BCE );
|
||||
h3 = m512_const1_64( 0x9A255629FF352CB1 );
|
||||
h4 = m512_const1_64( 0x5DB62599DF6CA7B0 );
|
||||
h5 = m512_const1_64( 0xEABE394CA9D5C3F4 );
|
||||
h6 = m512_const1_64( 0x991112C71A75B523 );
|
||||
h7 = m512_const1_64( 0xAE18A40B660FCC33 );
|
||||
|
||||
// Update
|
||||
|
||||
if ( len <= buf_size - ptr )
|
||||
{
|
||||
memcpy_512( buf + (ptr>>3), vdata, len>>3 );
|
||||
ptr += len;
|
||||
}
|
||||
else
|
||||
{
|
||||
first = ( bcount == 0 ) << 7;
|
||||
do {
|
||||
size_t clen;
|
||||
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
bcount ++;
|
||||
UBI_BIG_8WAY( 96 + first, 0 );
|
||||
first = 0;
|
||||
ptr = 0;
|
||||
}
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_512( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata += (clen>>3);
|
||||
len -= clen;
|
||||
} while ( len > 0 );
|
||||
}
|
||||
|
||||
// Close
|
||||
|
||||
unsigned et;
|
||||
|
||||
memset_zero_512( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||
et = 352 + ((bcount == 0) << 7);
|
||||
UBI_BIG_8WAY( et, ptr );
|
||||
|
||||
memset_zero_512( buf, buf_size >> 3 );
|
||||
bcount = 0;
|
||||
UBI_BIG_8WAY( 510, 8 );
|
||||
|
||||
casti_m512i( out, 0 ) = h0;
|
||||
casti_m512i( out, 1 ) = h1;
|
||||
casti_m512i( out, 2 ) = h2;
|
||||
casti_m512i( out, 3 ) = h3;
|
||||
casti_m512i( out, 4 ) = h4;
|
||||
casti_m512i( out, 5 ) = h5;
|
||||
casti_m512i( out, 6 ) = h6;
|
||||
casti_m512i( out, 7 ) = h7;
|
||||
}
|
||||
|
||||
void
|
||||
skein512_8way_prehash64( skein512_8way_context *sc, const void *data )
|
||||
{
|
||||
__m512i *vdata = (__m512*)data;
|
||||
__m512i *buf = sc->buf;
|
||||
buf[0] = vdata[0];
|
||||
buf[1] = vdata[1];
|
||||
buf[2] = vdata[2];
|
||||
buf[3] = vdata[3];
|
||||
buf[4] = vdata[4];
|
||||
buf[5] = vdata[5];
|
||||
buf[6] = vdata[6];
|
||||
buf[7] = vdata[7];
|
||||
register __m512i h0 = m512_const1_64( 0x4903ADFF749C51CE );
|
||||
register __m512i h1 = m512_const1_64( 0x0D95DE399746DF03 );
|
||||
register __m512i h2 = m512_const1_64( 0x8FD1934127C79BCE );
|
||||
register __m512i h3 = m512_const1_64( 0x9A255629FF352CB1 );
|
||||
register __m512i h4 = m512_const1_64( 0x5DB62599DF6CA7B0 );
|
||||
register __m512i h5 = m512_const1_64( 0xEABE394CA9D5C3F4 );
|
||||
register __m512i h6 = m512_const1_64( 0x991112C71A75B523 );
|
||||
register __m512i h7 = m512_const1_64( 0xAE18A40B660FCC33 );
|
||||
uint64_t bcount = 1;
|
||||
|
||||
UBI_BIG_8WAY( 224, 0 );
|
||||
sc->h0 = h0;
|
||||
sc->h1 = h1;
|
||||
sc->h2 = h2;
|
||||
sc->h3 = h3;
|
||||
sc->h4 = h4;
|
||||
sc->h5 = h5;
|
||||
sc->h6 = h6;
|
||||
sc->h7 = h7;
|
||||
}
|
||||
|
||||
void
|
||||
skein512_8way_final16( skein512_8way_context *sc, void *output,
|
||||
const void *data )
|
||||
{
|
||||
__m512i *in = (__m512i*)data;
|
||||
__m512i *buf = sc->buf;
|
||||
__m512i *out = (__m512i*)output;
|
||||
register __m512i h0 = sc->h0;
|
||||
register __m512i h1 = sc->h1;
|
||||
register __m512i h2 = sc->h2;
|
||||
register __m512i h3 = sc->h3;
|
||||
register __m512i h4 = sc->h4;
|
||||
register __m512i h5 = sc->h5;
|
||||
register __m512i h6 = sc->h6;
|
||||
register __m512i h7 = sc->h7;
|
||||
|
||||
const __m512i zero = m512_zero;
|
||||
buf[0] = in[0];
|
||||
buf[1] = in[1];
|
||||
buf[2] = zero;
|
||||
buf[3] = zero;
|
||||
buf[4] = zero;
|
||||
buf[5] = zero;
|
||||
buf[6] = zero;
|
||||
buf[7] = zero;
|
||||
|
||||
uint64_t bcount = 1;
|
||||
UBI_BIG_8WAY( 352, 16 );
|
||||
|
||||
buf[0] = zero;
|
||||
buf[1] = zero;
|
||||
|
||||
bcount = 0;
|
||||
UBI_BIG_8WAY( 510, 8 );
|
||||
|
||||
out[0] = h0;
|
||||
out[1] = h1;
|
||||
out[2] = h2;
|
||||
out[3] = h3;
|
||||
out[4] = h4;
|
||||
out[5] = h5;
|
||||
out[6] = h6;
|
||||
out[7] = h7;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
skein256_8way_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
@@ -709,6 +863,7 @@ void skein512_4way_init( skein512_4way_context *sc )
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
// Do not use for 128 bt data length
|
||||
static void
|
||||
skein_big_core_4way( skein512_4way_context *sc, const void *data,
|
||||
size_t len )
|
||||
@@ -794,6 +949,156 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
||||
memcpy_256( dst, buf, out_len >> 3 );
|
||||
}
|
||||
|
||||
void
|
||||
skein512_4way_full( skein512_4way_context *sc, void *out, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m256i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buf = sc->buf;
|
||||
size_t ptr = 0;
|
||||
unsigned first;
|
||||
const int buf_size = 64; // 64 * __m256i
|
||||
uint64_t bcount = 0;
|
||||
|
||||
h0 = m256_const1_64( 0x4903ADFF749C51CE );
|
||||
h1 = m256_const1_64( 0x0D95DE399746DF03 );
|
||||
h2 = m256_const1_64( 0x8FD1934127C79BCE );
|
||||
h3 = m256_const1_64( 0x9A255629FF352CB1 );
|
||||
h4 = m256_const1_64( 0x5DB62599DF6CA7B0 );
|
||||
h5 = m256_const1_64( 0xEABE394CA9D5C3F4 );
|
||||
h6 = m256_const1_64( 0x991112C71A75B523 );
|
||||
h7 = m256_const1_64( 0xAE18A40B660FCC33 );
|
||||
|
||||
// Update
|
||||
|
||||
if ( len <= buf_size - ptr )
|
||||
{
|
||||
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
|
||||
ptr += len;
|
||||
}
|
||||
else
|
||||
{
|
||||
first = ( bcount == 0 ) << 7;
|
||||
do {
|
||||
size_t clen;
|
||||
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
bcount ++;
|
||||
UBI_BIG_4WAY( 96 + first, 0 );
|
||||
first = 0;
|
||||
ptr = 0;
|
||||
}
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata += (clen>>3);
|
||||
len -= clen;
|
||||
} while ( len > 0 );
|
||||
}
|
||||
|
||||
// Close
|
||||
|
||||
unsigned et;
|
||||
|
||||
memset_zero_256( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||
et = 352 + ((bcount == 0) << 7);
|
||||
UBI_BIG_4WAY( et, ptr );
|
||||
|
||||
memset_zero_256( buf, buf_size >> 3 );
|
||||
bcount = 0;
|
||||
UBI_BIG_4WAY( 510, 8 );
|
||||
|
||||
casti_m256i( out, 0 ) = h0;
|
||||
casti_m256i( out, 1 ) = h1;
|
||||
casti_m256i( out, 2 ) = h2;
|
||||
casti_m256i( out, 3 ) = h3;
|
||||
casti_m256i( out, 4 ) = h4;
|
||||
casti_m256i( out, 5 ) = h5;
|
||||
casti_m256i( out, 6 ) = h6;
|
||||
casti_m256i( out, 7 ) = h7;
|
||||
}
|
||||
|
||||
void
|
||||
skein512_4way_prehash64( skein512_4way_context *sc, const void *data )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buf = sc->buf;
|
||||
buf[0] = vdata[0];
|
||||
buf[1] = vdata[1];
|
||||
buf[2] = vdata[2];
|
||||
buf[3] = vdata[3];
|
||||
buf[4] = vdata[4];
|
||||
buf[5] = vdata[5];
|
||||
buf[6] = vdata[6];
|
||||
buf[7] = vdata[7];
|
||||
register __m256i h0 = m256_const1_64( 0x4903ADFF749C51CE );
|
||||
register __m256i h1 = m256_const1_64( 0x0D95DE399746DF03 );
|
||||
register __m256i h2 = m256_const1_64( 0x8FD1934127C79BCE );
|
||||
register __m256i h3 = m256_const1_64( 0x9A255629FF352CB1 );
|
||||
register __m256i h4 = m256_const1_64( 0x5DB62599DF6CA7B0 );
|
||||
register __m256i h5 = m256_const1_64( 0xEABE394CA9D5C3F4 );
|
||||
register __m256i h6 = m256_const1_64( 0x991112C71A75B523 );
|
||||
register __m256i h7 = m256_const1_64( 0xAE18A40B660FCC33 );
|
||||
uint64_t bcount = 1;
|
||||
|
||||
UBI_BIG_4WAY( 224, 0 );
|
||||
sc->h0 = h0;
|
||||
sc->h1 = h1;
|
||||
sc->h2 = h2;
|
||||
sc->h3 = h3;
|
||||
sc->h4 = h4;
|
||||
sc->h5 = h5;
|
||||
sc->h6 = h6;
|
||||
sc->h7 = h7;
|
||||
}
|
||||
|
||||
void
|
||||
skein512_4way_final16( skein512_4way_context *sc, void *out, const void *data )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buf = sc->buf;
|
||||
register __m256i h0 = sc->h0;
|
||||
register __m256i h1 = sc->h1;
|
||||
register __m256i h2 = sc->h2;
|
||||
register __m256i h3 = sc->h3;
|
||||
register __m256i h4 = sc->h4;
|
||||
register __m256i h5 = sc->h5;
|
||||
register __m256i h6 = sc->h6;
|
||||
register __m256i h7 = sc->h7;
|
||||
|
||||
const __m256i zero = m256_zero;
|
||||
buf[0] = vdata[0];
|
||||
buf[1] = vdata[1];
|
||||
buf[2] = zero;
|
||||
buf[3] = zero;
|
||||
buf[4] = zero;
|
||||
buf[5] = zero;
|
||||
buf[6] = zero;
|
||||
buf[7] = zero;
|
||||
|
||||
uint64_t bcount = 1;
|
||||
UBI_BIG_4WAY( 352, 16 );
|
||||
|
||||
buf[0] = zero;
|
||||
buf[1] = zero;
|
||||
|
||||
bcount = 0;
|
||||
UBI_BIG_4WAY( 510, 8 );
|
||||
|
||||
casti_m256i( out, 0 ) = h0;
|
||||
casti_m256i( out, 1 ) = h1;
|
||||
casti_m256i( out, 2 ) = h2;
|
||||
casti_m256i( out, 3 ) = h3;
|
||||
casti_m256i( out, 4 ) = h4;
|
||||
casti_m256i( out, 5 ) = h5;
|
||||
casti_m256i( out, 6 ) = h6;
|
||||
casti_m256i( out, 7 ) = h7;
|
||||
}
|
||||
|
||||
void
|
||||
skein256_4way_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
@@ -806,6 +1111,9 @@ skein256_4way_close(void *cc, void *dst)
|
||||
skein_big_close_4way(cc, 0, 0, dst, 32);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Do not use with 128 bit data
|
||||
void
|
||||
skein512_4way_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
|
@@ -63,10 +63,16 @@ typedef struct
|
||||
typedef skein_8way_big_context skein512_8way_context;
|
||||
typedef skein_8way_big_context skein256_8way_context;
|
||||
|
||||
void skein512_8way_full( skein512_8way_context *sc, void *out,
|
||||
const void *data, size_t len );
|
||||
void skein512_8way_init( skein512_8way_context *sc );
|
||||
void skein512_8way_update( void *cc, const void *data, size_t len );
|
||||
void skein512_8way_close( void *cc, void *dst );
|
||||
|
||||
void skein512_8way_prehash64( skein512_8way_context *sc, const void *data );
|
||||
void skein512_8way_final16( skein512_8way_context *sc, void *out,
|
||||
const void *data );
|
||||
|
||||
void skein256_8way_init( skein256_8way_context *sc );
|
||||
void skein256_8way_update( void *cc, const void *data, size_t len );
|
||||
void skein256_8way_close( void *cc, void *dst );
|
||||
@@ -85,6 +91,8 @@ typedef skein_4way_big_context skein512_4way_context;
|
||||
typedef skein_4way_big_context skein256_4way_context;
|
||||
|
||||
void skein512_4way_init( skein512_4way_context *sc );
|
||||
void skein512_4way_full( skein512_4way_context *sc, void *out,
|
||||
const void *data, size_t len );
|
||||
void skein512_4way_update( void *cc, const void *data, size_t len );
|
||||
void skein512_4way_close( void *cc, void *dst );
|
||||
|
||||
@@ -92,6 +100,10 @@ void skein256_4way_init( skein256_4way_context *sc );
|
||||
void skein256_4way_update( void *cc, const void *data, size_t len );
|
||||
void skein256_4way_close( void *cc, void *dst );
|
||||
|
||||
void skein512_4way_prehash64( skein512_4way_context *sc, const void *data );
|
||||
void skein512_4way_final16( skein512_4way_context *sc, void *out,
|
||||
const void *data );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@@ -5,114 +5,126 @@
|
||||
|
||||
#if defined(SKEIN_8WAY)
|
||||
|
||||
static __thread skein512_8way_context skein512_8way_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void skein2hash_8way( void *output, const void *input )
|
||||
{
|
||||
skein512_8way_context ctx;
|
||||
uint64_t hash[16*8] __attribute__ ((aligned (128)));
|
||||
skein512_8way_context ctx;
|
||||
memcpy( &ctx, &skein512_8way_ctx, sizeof( ctx ) );
|
||||
|
||||
skein512_8way_init( &ctx );
|
||||
skein512_8way_update( &ctx, input, 80 );
|
||||
skein512_8way_close( &ctx, hash );
|
||||
|
||||
skein512_8way_init( &ctx );
|
||||
skein512_8way_update( &ctx, hash, 64 );
|
||||
skein512_8way_close( &ctx, output );
|
||||
skein512_8way_final16( &ctx, hash, input + (64*8) );
|
||||
skein512_8way_full( &ctx, output, hash, 64 );
|
||||
}
|
||||
|
||||
int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[16*8] __attribute__ ((aligned (128)));
|
||||
uint64_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[49]);
|
||||
uint64_t *hashq3 = &(hash[3*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint64_t targq3 = ((uint64_t*)ptarget)[3];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
skein512_8way_context ctx;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
skein512_8way_prehash64( &ctx, vdata );
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
|
||||
|
||||
skein2hash_8way( hash, vdata );
|
||||
skein512_8way_final16( &ctx, hash, vdata + (16*8) );
|
||||
skein512_8way_full( &ctx, hash, hash, 64 );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( hash7[ lane<<1 ] <= Htarg )
|
||||
if ( unlikely( hashq3[ lane ] <= targq3 && !bench ) )
|
||||
{
|
||||
extr_lane_8x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ) && !bench )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart );
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined(SKEIN_4WAY)
|
||||
|
||||
static __thread skein512_4way_context skein512_4way_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void skein2hash_4way( void *output, const void *input )
|
||||
{
|
||||
skein512_4way_context ctx;
|
||||
memcpy( &ctx, &skein512_4way_ctx, sizeof( ctx ) );
|
||||
uint64_t hash[16*4] __attribute__ ((aligned (64)));
|
||||
|
||||
skein512_4way_init( &ctx );
|
||||
skein512_4way_update( &ctx, input, 80 );
|
||||
skein512_4way_close( &ctx, hash );
|
||||
|
||||
skein512_4way_init( &ctx );
|
||||
skein512_4way_update( &ctx, hash, 64 );
|
||||
skein512_4way_close( &ctx, output );
|
||||
skein512_4way_final16( &ctx, hash, input + (64*4) );
|
||||
skein512_4way_full( &ctx, output, hash, 64 );
|
||||
}
|
||||
|
||||
int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[25]);
|
||||
uint64_t *hash_q3 = &(hash[3*4]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint64_t targ_q3 = ((uint64_t*)ptarget)[3];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
skein512_4way_context ctx;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_prehash64( &ctx, vdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
skein2hash_4way( hash, vdata );
|
||||
skein512_4way_final16( &ctx, hash, vdata + (16*4) );
|
||||
skein512_4way_full( &ctx, hash, hash, 64 );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash7[ lane<<1 ] <= Htarg )
|
||||
if ( hash_q3[ lane ] <= targ_q3 )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ) && !bench )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -120,6 +120,13 @@ void sph_whirlpool(void *cc, const void *data, size_t len);
|
||||
*/
|
||||
void sph_whirlpool_close(void *cc, void *dst);
|
||||
|
||||
#define sph_whirlpool512_full( cc, dst, data, len ) \
|
||||
do{ \
|
||||
sph_whirlpool_init( cc ); \
|
||||
sph_whirlpool( cc, data, len ); \
|
||||
sph_whirlpool_close( cc, dst ); \
|
||||
}while(0)
|
||||
|
||||
/**
|
||||
* WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL.
|
||||
*/
|
||||
|
@@ -35,8 +35,7 @@ void skunk_8way_hash( void *output, const void *input )
|
||||
skunk_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &skunk_8way_ctx, sizeof(skunk_8way_ctx) );
|
||||
|
||||
skein512_8way_update( &ctx.skein, input, 80 );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
skein512_8way_final16( &ctx.skein, vhash, input );
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash, 512 );
|
||||
|
||||
@@ -104,35 +103,35 @@ int scanhash_skunk_8way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( opt_benchmark )
|
||||
((uint32_t*)ptarget)[7] = 0x0cff;
|
||||
if ( bench ) ptarget[7] = 0x0fff;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
skein512_8way_prehash64( &skunk_8way_ctx.skein, vdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
|
||||
|
||||
skunk_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( unlikely( (hash+(i<<3))[7] <= Htarg ) )
|
||||
if ( likely( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) )
|
||||
if ( unlikely( valid_hash( hash+(i<<3), ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
pdata[19] = bswap_32( n+i );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n +=8;
|
||||
} while ( likely( ( n < max_nonce-8 ) && !(*restart) ) );
|
||||
|
||||
} while ( likely( ( n < last_nonce ) && !( *restart ) ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
@@ -159,17 +158,16 @@ static __thread skunk_4way_ctx_holder skunk_4way_ctx;
|
||||
|
||||
void skunk_4way_hash( void *output, const void *input )
|
||||
{
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (128)));
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
|
||||
skunk_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &skunk_4way_ctx, sizeof(skunk_4way_ctx) );
|
||||
|
||||
skein512_4way_update( &ctx.skein, input, 80 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_final16( &ctx.skein, vhash, input + (64*4) );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*)hash0, 64 );
|
||||
@@ -213,40 +211,40 @@ void skunk_4way_hash( void *output, const void *input )
|
||||
int scanhash_skunk_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
volatile uint8_t *restart = &( work_restart[ thr_id ].restart );
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( opt_benchmark )
|
||||
((uint32_t*)ptarget)[7] = 0x0cff;
|
||||
if ( bench ) ptarget[7] = 0x0fff;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_prehash64( &skunk_4way_ctx.skein, vdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
skunk_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg )
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
if ( unlikely( valid_hash( hash+(i<<3), ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
pdata[19] = bswap_32( n + i );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n +=4;
|
||||
} while ( ( n < max_nonce ) && !(*restart) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
} while ( likely( ( n < last_nonce ) && !( *restart ) ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -30,9 +30,6 @@
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
static void hex_getAlgoString(const uint32_t* prevblock, char *output)
|
||||
{
|
||||
char *sptr = output;
|
||||
@@ -50,6 +47,7 @@ static void hex_getAlgoString(const uint32_t* prevblock, char *output)
|
||||
*sptr = '\0';
|
||||
}
|
||||
|
||||
/*
|
||||
union _hex_context_overlay
|
||||
{
|
||||
#if defined(__AES__)
|
||||
@@ -66,7 +64,7 @@ union _hex_context_overlay
|
||||
sph_keccak512_context keccak;
|
||||
hashState_luffa luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
@@ -75,18 +73,19 @@ union _hex_context_overlay
|
||||
SHA512_CTX sha512;
|
||||
};
|
||||
typedef union _hex_context_overlay hex_context_overlay;
|
||||
*/
|
||||
|
||||
static __thread hex_context_overlay hex_ctx;
|
||||
static __thread x16r_context_overlay hex_ctx;
|
||||
|
||||
void hex_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[16];
|
||||
hex_context_overlay ctx;
|
||||
x16r_context_overlay ctx;
|
||||
memcpy( &ctx, &hex_ctx, sizeof(ctx) );
|
||||
void *in = (void*) input;
|
||||
int size = 80;
|
||||
|
||||
char elem = hashOrder[0];
|
||||
char elem = x16r_hash_order[0];
|
||||
uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
@@ -160,9 +159,7 @@ void hex_hash( void* output, const void* input )
|
||||
}
|
||||
break;
|
||||
case SHAVITE:
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash );
|
||||
shavite512_full( &ctx.shavite, hash, in, size );
|
||||
break;
|
||||
case SIMD:
|
||||
init_sd( &ctx.simd, 512 );
|
||||
@@ -190,9 +187,7 @@ void hex_hash( void* output, const void* input )
|
||||
sph_hamsi512_close( &ctx.hamsi, hash );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash );
|
||||
sph_fugue512_full( &ctx.fugue, hash, in, size );
|
||||
break;
|
||||
case SHABAL:
|
||||
if ( i == 0 )
|
||||
@@ -206,13 +201,12 @@ void hex_hash( void* output, const void* input )
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
if ( i == 0 )
|
||||
sph_whirlpool( &ctx.whirlpool, in+64, 16 );
|
||||
else
|
||||
{
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in, size );
|
||||
sph_whirlpool( &ctx.whirlpool, in+64, 16 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash );
|
||||
}
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash );
|
||||
else
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash, in, size );
|
||||
break;
|
||||
case SHA_512:
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
@@ -235,7 +229,7 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
const uint32_t last_nonce = max_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
uint32_t nonce = first_nonce;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
@@ -244,17 +238,18 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
|
||||
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
hex_getAlgoString( (const uint32_t*) (&edata[1]), hashOrder );
|
||||
hex_getAlgoString( (const uint32_t*) (&edata[1]), x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
|
||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
||||
const char elem = hashOrder[0];
|
||||
const char elem = x16r_hash_order[0];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
switch ( algo )
|
||||
{
|
||||
|
@@ -287,30 +287,14 @@ void x16r_8way_hash_generic( void* output, const void* input )
|
||||
shavite512_4way_full( &ctx.shavite, vhash, vhash, size );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
#else
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in0, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in1, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in2, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in3, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in4, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash4 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in5, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash5 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in6, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash6 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in7, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash7 );
|
||||
shavite512_full( &ctx.shavite, hash0, in0, size );
|
||||
shavite512_full( &ctx.shavite, hash1, in1, size );
|
||||
shavite512_full( &ctx.shavite, hash2, in2, size );
|
||||
shavite512_full( &ctx.shavite, hash3, in3, size );
|
||||
shavite512_full( &ctx.shavite, hash4, in4, size );
|
||||
shavite512_full( &ctx.shavite, hash5, in5, size );
|
||||
shavite512_full( &ctx.shavite, hash6, in6, size );
|
||||
shavite512_full( &ctx.shavite, hash7, in7, size );
|
||||
#endif
|
||||
break;
|
||||
case SIMD:
|
||||
@@ -363,30 +347,14 @@ void x16r_8way_hash_generic( void* output, const void* input )
|
||||
hash7, vhash );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in0, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in1, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in2, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in3, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in4, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash4 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in5, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash5 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in6, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash6 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in7, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash7 );
|
||||
sph_fugue512_full( &ctx.fugue, hash0, in0, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash1, in1, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash2, in2, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash3, in3, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash4, in4, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash5, in5, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash6, in6, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash7, in7, size );
|
||||
break;
|
||||
case SHABAL:
|
||||
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
@@ -431,30 +399,14 @@ void x16r_8way_hash_generic( void* output, const void* input )
|
||||
}
|
||||
else
|
||||
{
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in0, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in1, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in2, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in3, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in4, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash4 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in5, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash5 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in6, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash6 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in7, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash7 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash0, in0, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash1, in1, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash2, in2, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash3, in3, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash4, in4, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash5, in5, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash6, in6, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash7, in7, size );
|
||||
}
|
||||
break;
|
||||
case SHA_512:
|
||||
@@ -576,8 +528,7 @@ void x16r_4way_prehash( void *vdata, void *pdata )
|
||||
break;
|
||||
case SKEIN:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_init( &x16r_ctx.skein );
|
||||
skein512_4way_update( &x16r_ctx.skein, vdata, 64 );
|
||||
skein512_4way_prehash64( &x16r_ctx.skein, vdata );
|
||||
break;
|
||||
case LUFFA:
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
@@ -692,14 +643,12 @@ void x16r_4way_hash_generic( void* output, const void* input )
|
||||
break;
|
||||
case SKEIN:
|
||||
if ( i == 0 )
|
||||
skein512_4way_update( &ctx.skein, input + (64<<2), 16 );
|
||||
skein512_4way_final16( &ctx.skein, vhash, input + (64*4) );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, size );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, size );
|
||||
}
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case LUFFA:
|
||||
@@ -755,18 +704,10 @@ void x16r_4way_hash_generic( void* output, const void* input )
|
||||
}
|
||||
break;
|
||||
case SHAVITE:
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in0, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in1, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in2, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in3, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
shavite512_full( &ctx.shavite, hash0, in0, size );
|
||||
shavite512_full( &ctx.shavite, hash1, in1, size );
|
||||
shavite512_full( &ctx.shavite, hash2, in2, size );
|
||||
shavite512_full( &ctx.shavite, hash3, in3, size );
|
||||
break;
|
||||
case SIMD:
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
@@ -799,18 +740,10 @@ void x16r_4way_hash_generic( void* output, const void* input )
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in0, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in1, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in2, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in3, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
sph_fugue512_full( &ctx.fugue, hash0, in0, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash1, in1, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash2, in2, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash3, in3, size );
|
||||
break;
|
||||
case SHABAL:
|
||||
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||
@@ -841,18 +774,10 @@ void x16r_4way_hash_generic( void* output, const void* input )
|
||||
}
|
||||
else
|
||||
{
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in0, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in1, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in2, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in3, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash0, in0, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash1, in1, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash2, in2, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash3, in3, size );
|
||||
}
|
||||
break;
|
||||
case SHA_512:
|
||||
|
@@ -121,7 +121,7 @@ union _x16r_8way_context_overlay
|
||||
echo_4way_context echo;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
sph_shavite512_context shavite;
|
||||
shavite512_context shavite;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
} __attribute__ ((aligned (64)));
|
||||
@@ -152,7 +152,7 @@ union _x16r_4way_context_overlay
|
||||
luffa_2way_context luffa;
|
||||
hashState_luffa luffa1;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
shavite512_context shavite;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
@@ -191,7 +191,7 @@ union _x16r_context_overlay
|
||||
sph_keccak512_context keccak;
|
||||
hashState_luffa luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
|
@@ -124,9 +124,7 @@ void x16r_hash_generic( void* output, const void* input )
|
||||
(byte*)in, size );
|
||||
break;
|
||||
case SHAVITE:
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash );
|
||||
shavite512_full( &ctx.shavite, hash, in, size );
|
||||
break;
|
||||
case SIMD:
|
||||
simd_full( &ctx.simd, (BitSequence *)hash,
|
||||
@@ -153,9 +151,7 @@ void x16r_hash_generic( void* output, const void* input )
|
||||
sph_hamsi512_close( &ctx.hamsi, hash );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash );
|
||||
sph_fugue512_full( &ctx.fugue, hash, in, size );
|
||||
break;
|
||||
case SHABAL:
|
||||
if ( i == 0 )
|
||||
@@ -169,13 +165,12 @@ void x16r_hash_generic( void* output, const void* input )
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
if ( i == 0 )
|
||||
sph_whirlpool( &ctx.whirlpool, in+64, 16 );
|
||||
else
|
||||
{
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in, size );
|
||||
sph_whirlpool( &ctx.whirlpool, in+64, 16 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash );
|
||||
}
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash );
|
||||
else
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash, in, size );
|
||||
break;
|
||||
case SHA_512:
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
@@ -238,7 +233,7 @@ int scanhash_x16r( struct work *work, uint32_t max_nonce,
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -46,7 +46,7 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -35,9 +35,6 @@
|
||||
|
||||
#if defined (X16RV2_8WAY)
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
union _x16rv2_8way_context_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
@@ -60,7 +57,7 @@ union _x16rv2_8way_context_overlay
|
||||
echo_4way_context echo;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
sph_shavite512_context shavite;
|
||||
shavite512_context shavite;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
} __attribute__ ((aligned (64)));
|
||||
@@ -96,7 +93,7 @@ void x16rv2_8way_hash( void* output, const void* input )
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const char elem = x16r_hash_order[i];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch ( algo )
|
||||
@@ -374,30 +371,14 @@ void x16rv2_8way_hash( void* output, const void* input )
|
||||
shavite512_4way_full( &ctx.shavite, vhash, vhash, size );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
#else
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in0, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in1, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in2, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in3, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in4, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash4 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in5, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash5 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in6, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash6 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in7, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash7 );
|
||||
shavite512_full( &ctx.shavite, hash0, in0, size );
|
||||
shavite512_full( &ctx.shavite, hash1, in1, size );
|
||||
shavite512_full( &ctx.shavite, hash2, in2, size );
|
||||
shavite512_full( &ctx.shavite, hash3, in3, size );
|
||||
shavite512_full( &ctx.shavite, hash4, in4, size );
|
||||
shavite512_full( &ctx.shavite, hash5, in5, size );
|
||||
shavite512_full( &ctx.shavite, hash6, in6, size );
|
||||
shavite512_full( &ctx.shavite, hash7, in7, size );
|
||||
#endif
|
||||
break;
|
||||
case SIMD:
|
||||
@@ -451,30 +432,14 @@ void x16rv2_8way_hash( void* output, const void* input )
|
||||
hash7, vhash );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in0, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in1, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in2, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in3, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in4, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash4 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in5, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash5 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in6, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash6 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in7, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash7 );
|
||||
sph_fugue512_full( &ctx.fugue, hash0, in0, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash1, in1, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash2, in2, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash3, in3, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash4, in4, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash5, in5, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash6, in6, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash7, in7, size );
|
||||
break;
|
||||
case SHABAL:
|
||||
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
@@ -519,30 +484,14 @@ void x16rv2_8way_hash( void* output, const void* input )
|
||||
}
|
||||
else
|
||||
{
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in0, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in1, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in2, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in3, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in4, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash4 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in5, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash5 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in6, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash6 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in7, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash7 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash0, in0, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash1, in1, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash2, in2, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash3, in3, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash4, in4, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash5, in5, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash6, in6, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash7, in7, size );
|
||||
}
|
||||
break;
|
||||
case SHA_512:
|
||||
@@ -651,17 +600,19 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
bedata1[0] = bswap_32( pdata[1] );
|
||||
bedata1[1] = bswap_32( pdata[2] );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
const uint32_t ntime = bswap_32( pdata[17] );
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
|
||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
||||
const char elem = hashOrder[0];
|
||||
const char elem = x16r_hash_order[0];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
switch ( algo )
|
||||
{
|
||||
@@ -737,9 +688,6 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#elif defined (X16RV2_4WAY)
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
union _x16rv2_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
@@ -751,7 +699,7 @@ union _x16rv2_4way_context_overlay
|
||||
keccak512_4way_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
shavite512_context shavite;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
@@ -789,7 +737,7 @@ void x16rv2_4way_hash( void* output, const void* input )
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const char elem = x16r_hash_order[i];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch ( algo )
|
||||
@@ -835,47 +783,47 @@ void x16rv2_4way_hash( void* output, const void* input )
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case KECCAK:
|
||||
if ( i == 0 )
|
||||
{
|
||||
sph_tiger( &ctx.tiger, in0 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash0 );
|
||||
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||
sph_tiger( &ctx.tiger, in1 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash1 );
|
||||
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||
sph_tiger( &ctx.tiger, in2 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash2 );
|
||||
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||
sph_tiger( &ctx.tiger, in3 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash3 );
|
||||
}
|
||||
else
|
||||
{
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in0, size );
|
||||
sph_tiger_close( &ctx.tiger, hash0 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in1, size );
|
||||
sph_tiger_close( &ctx.tiger, hash1 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in2, size );
|
||||
sph_tiger_close( &ctx.tiger, hash2 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in3, size );
|
||||
sph_tiger_close( &ctx.tiger, hash3 );
|
||||
}
|
||||
for ( int i = (24/4); i < (64/4); i++ )
|
||||
if ( i == 0 )
|
||||
{
|
||||
sph_tiger( &ctx.tiger, in0 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash0 );
|
||||
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||
sph_tiger( &ctx.tiger, in1 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash1 );
|
||||
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||
sph_tiger( &ctx.tiger, in2 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash2 );
|
||||
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||
sph_tiger( &ctx.tiger, in3 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash3 );
|
||||
}
|
||||
else
|
||||
{
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in0, size );
|
||||
sph_tiger_close( &ctx.tiger, hash0 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in1, size );
|
||||
sph_tiger_close( &ctx.tiger, hash1 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in2, size );
|
||||
sph_tiger_close( &ctx.tiger, hash2 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in3, size );
|
||||
sph_tiger_close( &ctx.tiger, hash3 );
|
||||
}
|
||||
for ( int i = (24/4); i < (64/4); i++ )
|
||||
hash0[i] = hash1[i] = hash2[i] = hash3[i] = 0;
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case SKEIN:
|
||||
if ( i == 0 )
|
||||
skein512_4way_update( &ctx.skein, input + (64<<2), 16 );
|
||||
skein512_4way_final16( &ctx.skein, vhash, input + (64*4) );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
@@ -886,46 +834,46 @@ void x16rv2_4way_hash( void* output, const void* input )
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case LUFFA:
|
||||
if ( i == 0 )
|
||||
{
|
||||
sph_tiger( &ctx.tiger, in0 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash0 );
|
||||
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||
sph_tiger( &ctx.tiger, in1 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash1 );
|
||||
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||
sph_tiger( &ctx.tiger, in2 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash2 );
|
||||
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||
sph_tiger( &ctx.tiger, in3 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash3 );
|
||||
}
|
||||
else
|
||||
{
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in0, size );
|
||||
sph_tiger_close( &ctx.tiger, hash0 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in1, size );
|
||||
sph_tiger_close( &ctx.tiger, hash1 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in2, size );
|
||||
sph_tiger_close( &ctx.tiger, hash2 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in3, size );
|
||||
sph_tiger_close( &ctx.tiger, hash3 );
|
||||
}
|
||||
for ( int i = (24/4); i < (64/4); i++ )
|
||||
if ( i == 0 )
|
||||
{
|
||||
sph_tiger( &ctx.tiger, in0 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash0 );
|
||||
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||
sph_tiger( &ctx.tiger, in1 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash1 );
|
||||
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||
sph_tiger( &ctx.tiger, in2 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash2 );
|
||||
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||
sph_tiger( &ctx.tiger, in3 + 64, 16 );
|
||||
sph_tiger_close( &ctx.tiger, hash3 );
|
||||
}
|
||||
else
|
||||
{
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in0, size );
|
||||
sph_tiger_close( &ctx.tiger, hash0 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in1, size );
|
||||
sph_tiger_close( &ctx.tiger, hash1 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in2, size );
|
||||
sph_tiger_close( &ctx.tiger, hash2 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in3, size );
|
||||
sph_tiger_close( &ctx.tiger, hash3 );
|
||||
}
|
||||
for ( int i = (24/4); i < (64/4); i++ )
|
||||
hash0[i] = hash1[i] = hash2[i] = hash3[i] = 0;
|
||||
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
if ( i == 0 )
|
||||
@@ -959,18 +907,10 @@ void x16rv2_4way_hash( void* output, const void* input )
|
||||
}
|
||||
break;
|
||||
case SHAVITE:
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in0, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in1, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in2, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in3, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
shavite512_full( &ctx.shavite, hash0, in0, size );
|
||||
shavite512_full( &ctx.shavite, hash1, in1, size );
|
||||
shavite512_full( &ctx.shavite, hash2, in2, size );
|
||||
shavite512_full( &ctx.shavite, hash3, in3, size );
|
||||
break;
|
||||
case SIMD:
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
@@ -1003,18 +943,10 @@ void x16rv2_4way_hash( void* output, const void* input )
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in0, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in1, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in2, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in3, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
sph_fugue512_full( &ctx.fugue, hash0, in0, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash1, in1, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash2, in2, size );
|
||||
sph_fugue512_full( &ctx.fugue, hash3, in3, size );
|
||||
break;
|
||||
case SHABAL:
|
||||
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||
@@ -1045,18 +977,10 @@ void x16rv2_4way_hash( void* output, const void* input )
|
||||
}
|
||||
else
|
||||
{
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in0, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in1, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in2, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in3, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash0, in0, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash1, in1, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash2, in2, size );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash3, in3, size );
|
||||
}
|
||||
break;
|
||||
case SHA_512:
|
||||
@@ -1121,7 +1045,7 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
@@ -1130,17 +1054,19 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
bedata1[0] = bswap_32( pdata[1] );
|
||||
bedata1[1] = bswap_32( pdata[2] );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
const uint32_t ntime = bswap_32(pdata[17]);
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
||||
const char elem = hashOrder[0];
|
||||
const char elem = x16r_hash_order[0];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
switch ( algo )
|
||||
{
|
||||
@@ -1159,8 +1085,7 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
break;
|
||||
case SKEIN:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_init( &x16rv2_ctx.skein );
|
||||
skein512_4way_update( &x16rv2_ctx.skein, vdata, 64 );
|
||||
skein512_4way_prehash64( &x16r_ctx.skein, vdata );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
|
@@ -34,7 +34,6 @@
|
||||
#endif
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
union _x16rv2_context_overlay
|
||||
{
|
||||
@@ -52,7 +51,7 @@ union _x16rv2_context_overlay
|
||||
sph_keccak512_context keccak;
|
||||
hashState_luffa luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
@@ -74,16 +73,10 @@ void x16rv2_hash( void* output, const void* input )
|
||||
x16rv2_context_overlay ctx;
|
||||
void *in = (void*) input;
|
||||
int size = 80;
|
||||
/*
|
||||
if ( s_ntime == UINT32_MAX )
|
||||
{
|
||||
const uint8_t* in8 = (uint8_t*) input;
|
||||
x16_r_s_getAlgoString( &in8[4], hashOrder );
|
||||
}
|
||||
*/
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const char elem = x16r_hash_order[i];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch ( algo )
|
||||
@@ -143,9 +136,7 @@ void x16rv2_hash( void* output, const void* input )
|
||||
(const byte*)in, size );
|
||||
break;
|
||||
case SHAVITE:
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash );
|
||||
shavite512_full( &ctx.shavite, hash, in, size );
|
||||
break;
|
||||
case SIMD:
|
||||
init_sd( &ctx.simd, 512 );
|
||||
@@ -169,9 +160,7 @@ void x16rv2_hash( void* output, const void* input )
|
||||
sph_hamsi512_close( &ctx.hamsi, hash );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash );
|
||||
sph_fugue512_full( &ctx.fugue, hash, in, size );
|
||||
break;
|
||||
case SHABAL:
|
||||
sph_shabal512_init( &ctx.shabal );
|
||||
@@ -179,9 +168,7 @@ void x16rv2_hash( void* output, const void* input )
|
||||
sph_shabal512_close( &ctx.shabal, hash );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash, in, size );
|
||||
break;
|
||||
case SHA_512:
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
@@ -203,48 +190,48 @@ int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t _ALIGN(128) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
uint32_t nonce = first_nonce;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
if ( s_ntime != pdata[17] )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
|
||||
x16_r_s_getAlgoString( (const uint8_t*) (&edata[1]), x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)",
|
||||
x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
if ( bench ) ptarget[7] = 0x0cff;
|
||||
|
||||
do
|
||||
{
|
||||
be32enc( &endiandata[19], nonce );
|
||||
x16rv2_hash( hash32, endiandata );
|
||||
edata[19] = nonce;
|
||||
x16rv2_hash( hash32, edata );
|
||||
|
||||
if ( hash32[7] <= Htarg )
|
||||
if (fulltest( hash32, ptarget ) && !opt_benchmark )
|
||||
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
pdata[19] = bswap_32( nonce );
|
||||
submit_solution( work, hash32, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -97,7 +97,7 @@ int scanhash_x21s( struct work *work, uint32_t max_nonce,
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -563,59 +563,31 @@ void sonoa_hash( void *state, const void *input )
|
||||
}
|
||||
|
||||
int scanhash_sonoa( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19] - 1;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
uint64_t htmax[] =
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
|
||||
do
|
||||
{
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] =
|
||||
{
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
|
||||
|
||||
// we need bigendian data...
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
edata[19] = n;
|
||||
sonoa_hash( hash64, edata );
|
||||
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
sonoa_hash(hash32, endiandata);
|
||||
if ( !( hash32[7] & mask ) )
|
||||
if ( fulltest( hash32, ptarget ) && !opt_benchmark )
|
||||
submit_solution( work, hash32, mythr );
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
break;
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = bswap_32( n );
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -74,9 +74,7 @@ void x17_8way_hash( void *state, const void *input )
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhash, input, 80 );
|
||||
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -106,9 +104,7 @@ void x17_8way_hash( void *state, const void *input )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
@@ -136,30 +132,14 @@ void x17_8way_hash( void *state, const void *input )
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
|
||||
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash0, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash1, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash2, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash3, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash4, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash4 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash5, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash5 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash6, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash6 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash7, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash7 );
|
||||
shavite512_full( &ctx.shavite, hash0, hash0, 64 );
|
||||
shavite512_full( &ctx.shavite, hash1, hash1, 64 );
|
||||
shavite512_full( &ctx.shavite, hash2, hash2, 64 );
|
||||
shavite512_full( &ctx.shavite, hash3, hash3, 64 );
|
||||
shavite512_full( &ctx.shavite, hash4, hash4, 64 );
|
||||
shavite512_full( &ctx.shavite, hash5, hash5, 64 );
|
||||
shavite512_full( &ctx.shavite, hash6, hash6, 64 );
|
||||
shavite512_full( &ctx.shavite, hash7, hash7, 64 );
|
||||
|
||||
intrlv_4x128_512( vhashA, hash0, hash1, hash2, hash3 );
|
||||
intrlv_4x128_512( vhashB, hash4, hash5, hash6, hash7 );
|
||||
@@ -210,30 +190,14 @@ void x17_8way_hash( void *state, const void *input )
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash1, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash2, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash4, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash4 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash5, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash5 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash6, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash6 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash7, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash7 );
|
||||
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
|
||||
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
@@ -245,30 +209,14 @@ void x17_8way_hash( void *state, const void *input )
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash4, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash4 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash5, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash5 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash6, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash6 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash7, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash7 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, 64 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, 64 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash4, hash4, 64 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash5, hash5, 64 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
@@ -287,18 +235,18 @@ void x17_8way_hash( void *state, const void *input )
|
||||
int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash32[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t *hash32_d7 = &(hash32[7*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ32_d7 = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
@@ -307,12 +255,12 @@ int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
x17_8way_hash( hash, vdata );
|
||||
x17_8way_hash( hash32, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( ( hash7[ lane ] <= Htarg ) && !bench ) )
|
||||
if ( unlikely( ( hash32_d7[ lane ] <= targ32_d7 ) && !bench ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
@@ -378,9 +326,7 @@ void x17_4way_hash( void *state, const void *input )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
@@ -424,18 +370,10 @@ void x17_4way_hash( void *state, const void *input )
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash1, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash2, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
|
||||
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
@@ -445,18 +383,10 @@ void x17_4way_hash( void *state, const void *input )
|
||||
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, 64 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, 64 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
@@ -474,18 +404,18 @@ void x17_4way_hash( void *state, const void *input )
|
||||
int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash32[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *hash32_d7 = &(hash32[ 7*4 ]);
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce -4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ32_d7 = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
@@ -493,12 +423,12 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
x17_4way_hash( hash, vdata );
|
||||
x17_4way_hash( hash32, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( unlikely( hash7[ lane ] <= Htarg && !bench ) )
|
||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 && !bench ) )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
extr_lane_4x32( lane_hash, hash32, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
|
@@ -169,8 +169,8 @@ int scanhash_x17( struct work *work, uint32_t max_nonce,
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -76,9 +76,7 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
blake512_8way_full( &ctx.blake, vhash, input, 80 );
|
||||
memset( &vhash[8<<3], 0, 64<<3 );
|
||||
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, dataLen );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -108,9 +106,7 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, dataLen );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, dataLen );
|
||||
@@ -138,30 +134,14 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, dataLen<<3 );
|
||||
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash0, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash1, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash2, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash3, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash4, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash4 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash5, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash5 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash6, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash6 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash7, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash7 );
|
||||
shavite512_full( &ctx.shavite, hash0, hash0, dataLen );
|
||||
shavite512_full( &ctx.shavite, hash1, hash1, dataLen );
|
||||
shavite512_full( &ctx.shavite, hash2, hash2, dataLen );
|
||||
shavite512_full( &ctx.shavite, hash3, hash3, dataLen );
|
||||
shavite512_full( &ctx.shavite, hash4, hash4, dataLen );
|
||||
shavite512_full( &ctx.shavite, hash5, hash5, dataLen );
|
||||
shavite512_full( &ctx.shavite, hash6, hash6, dataLen );
|
||||
shavite512_full( &ctx.shavite, hash7, hash7, dataLen );
|
||||
|
||||
intrlv_4x128( vhashA, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
intrlv_4x128( vhashB, hash4, hash5, hash6, hash7, dataLen<<3 );
|
||||
@@ -212,30 +192,14 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, dataLen<<3 );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash1, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash2, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash3, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash4, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash4 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash5, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash5 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash6, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash6 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash7, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash7 );
|
||||
sph_fugue512_full( &ctx.fugue, hash0, hash0, dataLen );
|
||||
sph_fugue512_full( &ctx.fugue, hash1, hash1, dataLen );
|
||||
sph_fugue512_full( &ctx.fugue, hash2, hash2, dataLen );
|
||||
sph_fugue512_full( &ctx.fugue, hash3, hash3, dataLen );
|
||||
sph_fugue512_full( &ctx.fugue, hash4, hash4, dataLen );
|
||||
sph_fugue512_full( &ctx.fugue, hash5, hash5, dataLen );
|
||||
sph_fugue512_full( &ctx.fugue, hash6, hash6, dataLen );
|
||||
sph_fugue512_full( &ctx.fugue, hash7, hash7, dataLen );
|
||||
|
||||
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, dataLen<<3 );
|
||||
@@ -247,30 +211,14 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, dataLen<<3 );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash1, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash2, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash3, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash4, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash4 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash5, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash5 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash6, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash6 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash7, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash7 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash4, hash4, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash5, hash5, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, dataLen );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, dataLen<<3 );
|
||||
@@ -291,9 +239,7 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhash, vhash, dataLen );
|
||||
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, dataLen );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -323,9 +269,7 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, dataLen );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, dataLen );
|
||||
@@ -353,30 +297,14 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, dataLen<<3 );
|
||||
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash0, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash1, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash2, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash3, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash4, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash4 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash5, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash5 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash6, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash6 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash7, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash7 );
|
||||
shavite512_full( &ctx.shavite, hash0, hash0, dataLen );
|
||||
shavite512_full( &ctx.shavite, hash1, hash1, dataLen );
|
||||
shavite512_full( &ctx.shavite, hash2, hash2, dataLen );
|
||||
shavite512_full( &ctx.shavite, hash3, hash3, dataLen );
|
||||
shavite512_full( &ctx.shavite, hash4, hash4, dataLen );
|
||||
shavite512_full( &ctx.shavite, hash5, hash5, dataLen );
|
||||
shavite512_full( &ctx.shavite, hash6, hash6, dataLen );
|
||||
shavite512_full( &ctx.shavite, hash7, hash7, dataLen );
|
||||
|
||||
intrlv_4x128( vhashA, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
intrlv_4x128( vhashB, hash4, hash5, hash6, hash7, dataLen<<3 );
|
||||
@@ -427,30 +355,14 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, dataLen<<3 );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash1, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash2, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash3, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash4, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash4 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash5, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash5 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash6, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash6 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash7, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash7 );
|
||||
sph_fugue512_full( &ctx.fugue, hash0, hash0, dataLen );
|
||||
sph_fugue512_full( &ctx.fugue, hash1, hash1, dataLen );
|
||||
sph_fugue512_full( &ctx.fugue, hash2, hash2, dataLen );
|
||||
sph_fugue512_full( &ctx.fugue, hash3, hash3, dataLen );
|
||||
sph_fugue512_full( &ctx.fugue, hash4, hash4, dataLen );
|
||||
sph_fugue512_full( &ctx.fugue, hash5, hash5, dataLen );
|
||||
sph_fugue512_full( &ctx.fugue, hash6, hash6, dataLen );
|
||||
sph_fugue512_full( &ctx.fugue, hash7, hash7, dataLen );
|
||||
|
||||
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, dataLen<<3 );
|
||||
@@ -462,30 +374,14 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, dataLen<<3 );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash1, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash2, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash3, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash4, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash4 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash5, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash5 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash6, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash6 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash7, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash7 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash4, hash4, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash5, hash5, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, dataLen );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, dataLen<<3 );
|
||||
@@ -504,40 +400,43 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
int scanhash_xevan_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t *hashd7 = &(hash[7*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
xevan_8way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if unlikely( ( hash7[ lane ] <= Htarg ) )
|
||||
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
@@ -578,8 +477,6 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
const int dataLen = 128;
|
||||
xevan_4way_context_overlay ctx __attribute__ ((aligned (64)));
|
||||
|
||||
// parallel 4 way
|
||||
|
||||
blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||
memset( &vhash[8<<2], 0, 64<<2 );
|
||||
|
||||
@@ -598,9 +495,7 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
// Parallel 4way
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, dataLen );
|
||||
@@ -618,15 +513,11 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
|
||||
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
|
||||
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||
shavite512_2way_full( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
shavite512_2way_full( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, dataLen );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, dataLen );
|
||||
|
||||
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
|
||||
@@ -649,18 +540,10 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash1, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash2, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash3, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
|
||||
|
||||
// Parallel 4way 32 bit
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
@@ -672,18 +555,10 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
// Serial
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash1, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash2, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash3, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, dataLen );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
@@ -718,9 +593,7 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, dataLen );
|
||||
@@ -738,15 +611,11 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
|
||||
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
|
||||
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||
shavite512_2way_full( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
shavite512_2way_full( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, dataLen );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, dataLen );
|
||||
|
||||
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
|
||||
@@ -768,18 +637,10 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash1, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash2, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash3, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
|
||||
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
@@ -789,18 +650,10 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash1, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash2, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash3, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, dataLen );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, dataLen );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
@@ -818,41 +671,43 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashd7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
if ( bench ) ptarget[7] = 0x0cff;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do {
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
xevan_4way_hash( hash, vdata );
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash7[ lane ] <= Htarg )
|
||||
if ( unlikely( hashd7[ lane ] <= targ32 ) && ! bench )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce-4 ) && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
128
algo/x17/xevan.c
128
algo/x17/xevan.c
@@ -56,8 +56,6 @@ typedef struct {
|
||||
} xevan_ctx_holder;
|
||||
|
||||
xevan_ctx_holder xevan_ctx __attribute__ ((aligned (64)));
|
||||
static __thread sph_blake512_context xevan_blake_mid
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void init_xevan_ctx()
|
||||
{
|
||||
@@ -85,34 +83,23 @@ void init_xevan_ctx()
|
||||
#endif
|
||||
};
|
||||
|
||||
void xevan_blake512_midstate( const void* input )
|
||||
{
|
||||
memcpy( &xevan_blake_mid, &xevan_ctx.blake, sizeof xevan_blake_mid );
|
||||
sph_blake512( &xevan_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void xevan_hash(void *output, const void *input)
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[32]; // 128 bytes required
|
||||
uint32_t _ALIGN(64) hash[32]; // 128 bytes required
|
||||
const int dataLen = 128;
|
||||
xevan_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
|
||||
|
||||
const int midlen = 64; // bytes
|
||||
const int tail = 80 - midlen; // 16
|
||||
|
||||
memcpy( &ctx.blake, &xevan_blake_mid, sizeof xevan_blake_mid );
|
||||
sph_blake512( &ctx.blake, input + midlen, tail );
|
||||
sph_blake512_close(&ctx.blake, hash);
|
||||
xevan_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
|
||||
|
||||
sph_blake512( &ctx.blake, input, 80 );
|
||||
sph_blake512_close( &ctx.blake, hash );
|
||||
memset(&hash[16], 0, 64);
|
||||
|
||||
sph_bmw512(&ctx.bmw, hash, dataLen);
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
|
||||
#if defined(__AES__)
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const char*)hash, dataLen*8 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const char*)hash, dataLen*8 );
|
||||
#else
|
||||
sph_groestl512(&ctx.groestl, hash, dataLen);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
@@ -127,20 +114,20 @@ void xevan_hash(void *output, const void *input)
|
||||
sph_keccak512(&ctx.keccak, hash, dataLen);
|
||||
sph_keccak512_close(&ctx.keccak, hash);
|
||||
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, dataLen );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, dataLen );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
|
||||
(const byte*) hash, dataLen );
|
||||
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
|
||||
(const byte*) hash, dataLen );
|
||||
|
||||
sph_shavite512(&ctx.shavite, hash, dataLen);
|
||||
sph_shavite512_close(&ctx.shavite, hash);
|
||||
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
(const BitSequence *)hash, dataLen*8 );
|
||||
|
||||
#if defined(__AES__)
|
||||
update_final_echo( &ctx.echo, (BitSequence *) hash,
|
||||
update_final_echo( &ctx.echo, (BitSequence *) hash,
|
||||
(const BitSequence *) hash, dataLen*8 );
|
||||
#else
|
||||
sph_echo512(&ctx.echo, hash, dataLen);
|
||||
@@ -159,15 +146,15 @@ void xevan_hash(void *output, const void *input)
|
||||
sph_whirlpool(&ctx.whirlpool, hash, dataLen);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash);
|
||||
|
||||
SHA512_Update( &ctx.sha512, hash, dataLen );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, hash, dataLen );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
|
||||
sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
|
||||
sph_haval256_5_close(&ctx.haval, hash);
|
||||
|
||||
memset(&hash[8], 0, dataLen - 32);
|
||||
|
||||
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
|
||||
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
|
||||
|
||||
sph_blake512(&ctx.blake, hash, dataLen);
|
||||
sph_blake512_close(&ctx.blake, hash);
|
||||
@@ -176,11 +163,11 @@ void xevan_hash(void *output, const void *input)
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
|
||||
#if defined(__AES__)
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const BitSequence*)hash, dataLen*8 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const BitSequence*)hash, dataLen*8 );
|
||||
#else
|
||||
sph_groestl512(&ctx.groestl, hash, dataLen);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#endif
|
||||
|
||||
sph_skein512(&ctx.skein, hash, dataLen);
|
||||
@@ -191,24 +178,25 @@ void xevan_hash(void *output, const void *input)
|
||||
|
||||
sph_keccak512(&ctx.keccak, hash, dataLen);
|
||||
sph_keccak512_close(&ctx.keccak, hash);
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, dataLen );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
|
||||
(const byte*) hash, dataLen );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, dataLen );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
|
||||
(const byte*) hash, dataLen );
|
||||
|
||||
sph_shavite512(&ctx.shavite, hash, dataLen);
|
||||
sph_shavite512_close(&ctx.shavite, hash);
|
||||
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
(const BitSequence *)hash, dataLen*8 );
|
||||
|
||||
#if defined(__AES__)
|
||||
update_final_echo( &ctx.echo, (BitSequence *) hash,
|
||||
update_final_echo( &ctx.echo, (BitSequence *) hash,
|
||||
(const BitSequence *) hash, dataLen*8 );
|
||||
#else
|
||||
sph_echo512(&ctx.echo, hash, dataLen);
|
||||
sph_echo512_close(&ctx.echo, hash);
|
||||
sph_echo512(&ctx.echo, hash, dataLen);
|
||||
sph_echo512_close(&ctx.echo, hash);
|
||||
#endif
|
||||
|
||||
sph_hamsi512(&ctx.hamsi, hash, dataLen);
|
||||
@@ -223,8 +211,8 @@ void xevan_hash(void *output, const void *input)
|
||||
sph_whirlpool(&ctx.whirlpool, hash, dataLen);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash);
|
||||
|
||||
SHA512_Update( &ctx.sha512, hash, dataLen );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, hash, dataLen );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
|
||||
sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
|
||||
sph_haval256_5_close(&ctx.haval, hash);
|
||||
@@ -233,41 +221,33 @@ void xevan_hash(void *output, const void *input)
|
||||
}
|
||||
|
||||
int scanhash_xevan( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x0cff;
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
|
||||
for (int k=0; k < 19; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
|
||||
xevan_blake512_midstate( endiandata );
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
xevan_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
do
|
||||
{
|
||||
edata[19] = n;
|
||||
xevan_hash( hash64, edata );
|
||||
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n );
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -87,64 +87,40 @@ void x22i_8way_hash( void *output, const void *input )
|
||||
unsigned char hashA7[64] __attribute__((aligned(32))) = {0};
|
||||
x22i_8way_ctx_overlay ctx;
|
||||
|
||||
blake512_8way_init( &ctx.blake );
|
||||
blake512_8way_update( &ctx.blake, input, 80 );
|
||||
blake512_8way_close( &ctx.blake, vhash );
|
||||
blake512_8way_full( &ctx.blake, vhash, input, 80 );
|
||||
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, 512 );
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, 512 );
|
||||
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(const char*)hash0, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(const char*)hash1, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(const char*)hash2, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(const char*)hash3, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash4,
|
||||
(const char*)hash4, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash5,
|
||||
(const char*)hash5, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash6,
|
||||
(const char*)hash6, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash7,
|
||||
(const char*)hash7, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
@@ -155,22 +131,16 @@ void x22i_8way_hash( void *output, const void *input )
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
luffa512_4way_full( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa512_4way_full( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 64 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 64 );
|
||||
cube_4way_full( &ctx.cube, vhashA, 512, vhashA, 64 );
|
||||
cube_4way_full( &ctx.cube, vhashB, 512, vhashB, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
|
||||
#else
|
||||
|
||||
@@ -207,17 +177,13 @@ void x22i_8way_hash( void *output, const void *input )
|
||||
|
||||
#endif
|
||||
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhashA, vhashA, 512 );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||
simd512_4way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_4way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhashA, vhashA, 512 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhashB, vhashB, 512 );
|
||||
echo_4way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
|
||||
echo_4way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
|
||||
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
@@ -226,30 +192,22 @@ void x22i_8way_hash( void *output, const void *input )
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash0,
|
||||
(const BitSequence*)hash0, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash1,
|
||||
(const BitSequence*)hash1, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash2,
|
||||
(const BitSequence*)hash2, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash3,
|
||||
(const BitSequence*)hash3, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash4,
|
||||
(const BitSequence*)hash4, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash5,
|
||||
(const BitSequence*)hash5, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash6,
|
||||
(const BitSequence*)hash6, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash7,
|
||||
(const BitSequence*)hash7, 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
|
||||
(const BitSequence *)hash4, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
|
||||
(const BitSequence *)hash5, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
|
||||
(const BitSequence *)hash6, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
|
||||
(const BitSequence *)hash7, 64 );
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
@@ -443,6 +401,55 @@ void x22i_8way_hash( void *output, const void *input )
|
||||
sha256_8way_close( &ctx.sha256, output );
|
||||
}
|
||||
|
||||
int scanhash_x22i_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashd7 = &(hash[7*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
x22i_8way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
int scanhash_x22i_8way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
@@ -488,6 +495,7 @@ int scanhash_x22i_8way( struct work* work, uint32_t max_nonce,
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
|
||||
#elif defined(X22I_4WAY)
|
||||
|
||||
@@ -531,33 +539,21 @@ void x22i_4way_hash( void *output, const void *input )
|
||||
unsigned char hashA3[64] __attribute__((aligned(32))) = {0};
|
||||
x22i_ctx_overlay ctx;
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(const char*)hash0, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(const char*)hash1, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(const char*)hash2, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(const char*)hash3, 512 );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (const char*)hash0, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (const char*)hash1, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (const char*)hash2, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (const char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
@@ -569,41 +565,29 @@ void x22i_4way_hash( void *output, const void *input )
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
luffa512_2way_full( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa512_2way_full( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashA, vhashA, 64 );
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashB, vhashB, 64 );
|
||||
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, 64 );
|
||||
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, 64 );
|
||||
|
||||
shavite512_2way_full( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_2way_full( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, 512 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
|
||||
dintrlv_2x128_512( hash0, hash1, vhashA );
|
||||
dintrlv_2x128_512( hash2, hash3, vhashB );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash0,
|
||||
(const BitSequence*)hash0, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash1,
|
||||
(const BitSequence*)hash1, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash2,
|
||||
(const BitSequence*)hash2, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash3,
|
||||
(const BitSequence*)hash3, 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
@@ -722,44 +706,47 @@ void x22i_4way_hash( void *output, const void *input )
|
||||
int scanhash_x22i_4way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashd7 = &(hash[ 7*4 ]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x22i_4way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if unlikely( ( hash7[ lane ] <= Htarg ) )
|
||||
if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( likely( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
} while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -167,40 +167,38 @@ void x22i_hash( void *output, const void *input )
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_x22i( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
int scanhash_x22i( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = n;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
|
||||
for (int k=0; k < 20; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
|
||||
do
|
||||
{
|
||||
pdata[19] = ++n;
|
||||
be32enc( &endiandata[19], n );
|
||||
|
||||
x22i_hash( hash, endiandata );
|
||||
|
||||
if ( hash[7] < Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
submit_solution( work, hash, mythr );
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
edata[19] = n;
|
||||
x22i_hash( hash64, edata );
|
||||
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n );
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -530,6 +530,55 @@ void x25x_8way_hash( void *output, const void *input )
|
||||
blake2s_8way_full_blocks( &ctx.blake2s, output, vhashX, 64*24 );
|
||||
}
|
||||
|
||||
int scanhash_x25x_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashd7 = &(hash[7*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
x25x_8way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
int scanhash_x25x_8way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
@@ -574,6 +623,7 @@ int scanhash_x25x_8way( struct work* work, uint32_t max_nonce,
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
|
||||
#elif defined(X25X_4WAY)
|
||||
|
||||
@@ -614,9 +664,7 @@ void x25x_4way_hash( void *output, const void *input )
|
||||
unsigned char vhashX[24][64*4] __attribute__ ((aligned (64)));
|
||||
x25x_4way_ctx_overlay ctx __attribute__ ((aligned (64)));
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||
dintrlv_4x64_512( hash0[0], hash1[0], hash2[0], hash3[0], vhash );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
@@ -624,24 +672,13 @@ void x25x_4way_hash( void *output, const void *input )
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64_512( hash0[1], hash1[1], hash2[1], hash3[1], vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0[2],
|
||||
(const char*)hash0[1], 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1[2],
|
||||
(const char*)hash1[1], 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2[2],
|
||||
(const char*)hash2[1], 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3[2],
|
||||
(const char*)hash3[1], 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash0[2], (const char*)hash0[1], 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1[2], (const char*)hash1[1], 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2[2], (const char*)hash2[1], 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3[2], (const char*)hash3[1], 512 );
|
||||
|
||||
intrlv_4x64_512( vhash, hash0[2], hash1[2], hash2[2], hash3[2] );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
dintrlv_4x64_512( hash0[3], hash1[3], hash2[3], hash3[3], vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
@@ -654,32 +691,20 @@ void x25x_4way_hash( void *output, const void *input )
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64_512( hash0[5], hash1[5], hash2[5], hash3[5], vhash );
|
||||
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash0[6],
|
||||
(const BitSequence*)hash0[5], 64 );
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash1[6],
|
||||
(const BitSequence*)hash1[5], 64 );
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash2[6],
|
||||
(const BitSequence*)hash2[5], 64 );
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash3[6],
|
||||
(const BitSequence*)hash3[5], 64 );
|
||||
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0[7],
|
||||
(const byte*)hash0[6], 64 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash1[7],
|
||||
(const byte*)hash1[6], 64 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash2[7],
|
||||
(const byte*)hash2[6], 64 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash3[7],
|
||||
(const byte*)hash3[6], 64 );
|
||||
luffa_full( &ctx.luffa, (BitSequence*)hash0[6], 512,
|
||||
(const BitSequence*)hash0[5], 64 );
|
||||
luffa_full( &ctx.luffa, (BitSequence*)hash1[6], 512,
|
||||
(const BitSequence*)hash1[5], 64 );
|
||||
luffa_full( &ctx.luffa, (BitSequence*)hash2[6], 512,
|
||||
(const BitSequence*)hash2[5], 64 );
|
||||
luffa_full( &ctx.luffa, (BitSequence*)hash3[6], 512,
|
||||
(const BitSequence*)hash3[5], 64 );
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*)hash0[7], 512, (const byte*)hash0[6], 64 );
|
||||
cubehash_full( &ctx.cube, (byte*)hash1[7], 512, (const byte*)hash1[6], 64 );
|
||||
cubehash_full( &ctx.cube, (byte*)hash2[7], 512, (const byte*)hash2[6], 64 );
|
||||
cubehash_full( &ctx.cube, (byte*)hash3[7], 512, (const byte*)hash3[6], 64 );
|
||||
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash0[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash0[8]);
|
||||
@@ -693,31 +718,23 @@ void x25x_4way_hash( void *output, const void *input )
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash3[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash3[8]);
|
||||
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)hash0[9],
|
||||
(const BitSequence*)hash0[8], 512 );
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)hash1[9],
|
||||
(const BitSequence*)hash1[8], 512 );
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)hash2[9],
|
||||
(const BitSequence*)hash2[8], 512 );
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)hash3[9],
|
||||
(const BitSequence*)hash3[8], 512 );
|
||||
simd_full( &ctx.simd, (BitSequence*)hash0[9],
|
||||
(const BitSequence*)hash0[8], 512 );
|
||||
simd_full( &ctx.simd, (BitSequence*)hash1[9],
|
||||
(const BitSequence*)hash1[8], 512 );
|
||||
simd_full( &ctx.simd, (BitSequence*)hash2[9],
|
||||
(const BitSequence*)hash2[8], 512 );
|
||||
simd_full( &ctx.simd, (BitSequence*)hash3[9],
|
||||
(const BitSequence*)hash3[8], 512 );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash0[10],
|
||||
(const BitSequence*)hash0[9], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash1[10],
|
||||
(const BitSequence*)hash1[9], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash2[10],
|
||||
(const BitSequence*)hash2[9], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash3[10],
|
||||
(const BitSequence*)hash3[9], 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0[10], 512,
|
||||
(const BitSequence *)hash0[ 9], 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1[10], 512,
|
||||
(const BitSequence *)hash1[ 9], 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2[10], 512,
|
||||
(const BitSequence *)hash2[ 9], 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3[10], 512,
|
||||
(const BitSequence *)hash3[ 9], 64 );
|
||||
|
||||
intrlv_4x64_512( vhash, hash0[10], hash1[10], hash2[10], hash3[10] );
|
||||
|
||||
@@ -870,43 +887,46 @@ void x25x_4way_hash( void *output, const void *input )
|
||||
int scanhash_x25x_4way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashd7 = &(hash[ 7*4 ]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x25x_4way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
} while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -201,42 +201,38 @@ void x25x_hash( void *output, const void *input )
|
||||
memcpy(output, &hash[24], 32);
|
||||
}
|
||||
|
||||
int scanhash_x25x( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
int scanhash_x25x( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = n;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
|
||||
for (int k=0; k < 20; k++)
|
||||
be32enc(&edata[k], pdata[k]);
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
do
|
||||
{
|
||||
pdata[19] = ++n;
|
||||
be32enc( &edata[19], n );
|
||||
|
||||
x25x_hash( hash, edata );
|
||||
|
||||
if ( hash[7] < Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
submit_solution( work, hash, mythr );
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
edata[19] = n;
|
||||
x25x_hash( hash64, edata );
|
||||
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n );
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -73,6 +73,7 @@ bool register_yescryptr8g_algo( algo_gate_t* gate )
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_yespower_r8g;
|
||||
gate->hash = (void*)&yespower_tls;
|
||||
pk_buffer_size = 26;
|
||||
opt_sapling = true;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.0.1.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.5.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.12.0.1'
|
||||
PACKAGE_STRING='cpuminer-opt 3.12.0.1'
|
||||
PACKAGE_VERSION='3.12.5'
|
||||
PACKAGE_STRING='cpuminer-opt 3.12.5'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.12.0.1 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.12.5 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1404,7 +1404,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.12.0.1:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.12.5:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1509,7 +1509,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.12.0.1
|
||||
cpuminer-opt configure 3.12.5
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.12.0.1, which was
|
||||
It was created by cpuminer-opt $as_me 3.12.5, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2993,7 +2993,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.12.0.1'
|
||||
VERSION='3.12.5'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.12.0.1, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.12.5, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.12.0.1
|
||||
cpuminer-opt config.status 3.12.5
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.12.0.1])
|
||||
AC_INIT([cpuminer-opt], [3.12.5])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
744
cpu-miner.c
744
cpu-miner.c
File diff suppressed because it is too large
Load Diff
27
miner.h
27
miner.h
@@ -312,6 +312,19 @@ int varint_encode( unsigned char *p, uint64_t n );
|
||||
size_t address_to_script( unsigned char *out, size_t outsz, const char *addr );
|
||||
int timeval_subtract( struct timeval *result, struct timeval *x,
|
||||
struct timeval *y);
|
||||
|
||||
// Bitcoin formula for converting difficulty to an equivalent
|
||||
// number of hashes.
|
||||
//
|
||||
// https://en.bitcoin.it/wiki/Difficulty
|
||||
//
|
||||
// hash = diff * 2**32
|
||||
//
|
||||
// diff_to_hash = 2**32 = 0x100000000 = 4294967296 = exp32;
|
||||
|
||||
const double exp32; // 2**32
|
||||
const double exp64; // 2**64
|
||||
|
||||
bool fulltest( const uint32_t *hash, const uint32_t *target );
|
||||
bool valid_hash( const void*, const void* );
|
||||
|
||||
@@ -332,11 +345,12 @@ struct thr_info {
|
||||
|
||||
//struct thr_info *thr_info;
|
||||
|
||||
bool submit_solution( struct work *work, const void *hash,
|
||||
struct thr_info *thr );
|
||||
bool submit_lane_solution( struct work *work, const void *hash,
|
||||
struct thr_info *thr, const int lane );
|
||||
bool submit_solution( struct work *work, const void *hash,
|
||||
struct thr_info *thr );
|
||||
|
||||
// deprecated
|
||||
bool submit_lane_solution( struct work *work, const void *hash,
|
||||
struct thr_info *thr, const int lane );
|
||||
|
||||
bool submit_work( struct thr_info *thr, const struct work *work_in );
|
||||
|
||||
@@ -378,6 +392,7 @@ struct work {
|
||||
size_t xnonce2_len;
|
||||
unsigned char *xnonce2;
|
||||
bool sapling;
|
||||
bool stale;
|
||||
|
||||
// x16rt
|
||||
uint32_t merkleroothash[8];
|
||||
@@ -754,6 +769,8 @@ extern uint32_t solved_block_count;
|
||||
extern pthread_mutex_t applog_lock;
|
||||
extern pthread_mutex_t stats_lock;
|
||||
extern bool opt_sapling;
|
||||
extern const int pk_buffer_size_max;
|
||||
extern int pk_buffer_size;
|
||||
|
||||
static char const usage[] = "\
|
||||
Usage: " PACKAGE_NAME " [OPTIONS]\n\
|
||||
@@ -852,7 +869,7 @@ Options:\n\
|
||||
zr5 Ziftr\n\
|
||||
-N, --param-n N parameter for scrypt based algos\n\
|
||||
-R, --patam-r R parameter for scrypt based algos\n\
|
||||
-K, --param-key Key parameter for algos that use it\n\
|
||||
-K, --param-key Key (pers) parameter for algos that use it\n\
|
||||
-o, --url=URL URL of mining server\n\
|
||||
-O, --userpass=U:P username:password pair for mining server\n\
|
||||
-u, --user=USERNAME username for mining server\n\
|
||||
|
@@ -120,14 +120,26 @@ do { \
|
||||
} while(0)
|
||||
|
||||
|
||||
// Horizontal vector testing
|
||||
// Bytewise test of all 256 bits
|
||||
#define mm256_all0_8( a ) \
|
||||
( _mm256_movemask_epi8( a ) == 0 )
|
||||
|
||||
#define mm256_allbits0( a ) _mm256_testz_si256( a, a )
|
||||
#define mm256_allbits1( a ) _mm256_testc_si256( a, m256_neg1 )
|
||||
//broken
|
||||
//#define mm256_allbitsne( a ) _mm256_testnzc_si256( a, m256_neg1 )
|
||||
#define mm256_anybits0( a ) !mm256_allbits1( a )
|
||||
#define mm256_anybits1( a ) !mm256_allbits0( a )
|
||||
#define mm256_all1_8( a ) \
|
||||
( _mm256_movemask_epi8( a ) == -1 )
|
||||
|
||||
|
||||
#define mm256_anybits0( a ) \
|
||||
( _mm256_movemask_epi8( a ) & 0xffffffff )
|
||||
|
||||
#define mm256_anybits1( a ) \
|
||||
( ( _mm256_movemask_epi8( a ) & 0xffffffff ) != 0xffffffff )
|
||||
|
||||
|
||||
// Bitwise test of all 256 bits
|
||||
#define mm256_allbits0( a ) _mm256_testc_si256( a, m256_neg1 )
|
||||
#define mm256_allbits1( a ) _mm256_testc_si256( m256_zero, a )
|
||||
//#define mm256_anybits0( a ) !mm256_allbits1( a )
|
||||
//#define mm256_anybits1( a ) !mm256_allbits0( a )
|
||||
|
||||
|
||||
// Parallel AES, for when x is expected to be in a 256 bit register.
|
||||
|
115
util.c
115
util.c
@@ -159,8 +159,6 @@ void applog2( int prio, const char *fmt, ... )
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void applog(int prio, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
@@ -921,25 +919,28 @@ bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t buflen)
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t address_to_script(unsigned char *out, size_t outsz, const char *addr)
|
||||
size_t address_to_script( unsigned char *out, size_t outsz, const char *addr )
|
||||
{
|
||||
unsigned char addrbin[26];
|
||||
unsigned char addrbin[ pk_buffer_size_max ];
|
||||
int addrver;
|
||||
size_t rv;
|
||||
|
||||
if (!b58dec(addrbin, sizeof(addrbin), addr))
|
||||
if ( !b58dec( addrbin, outsz, addr ) )
|
||||
return 0;
|
||||
addrver = b58check(addrbin, sizeof(addrbin), addr);
|
||||
if (addrver < 0)
|
||||
|
||||
addrver = b58check( addrbin, outsz, addr );
|
||||
if ( addrver < 0 )
|
||||
return 0;
|
||||
switch (addrver) {
|
||||
|
||||
switch ( addrver )
|
||||
{
|
||||
case 5: /* Bitcoin script hash */
|
||||
case 196: /* Testnet script hash */
|
||||
if (outsz < (rv = 23))
|
||||
if ( outsz < ( rv = 23 ) )
|
||||
return rv;
|
||||
out[ 0] = 0xa9; /* OP_HASH160 */
|
||||
out[ 1] = 0x14; /* push 20 bytes */
|
||||
memcpy(&out[2], &addrbin[1], 20);
|
||||
memcpy( &out[2], &addrbin[1], 20 );
|
||||
out[22] = 0x87; /* OP_EQUAL */
|
||||
return rv;
|
||||
default:
|
||||
@@ -948,7 +949,7 @@ size_t address_to_script(unsigned char *out, size_t outsz, const char *addr)
|
||||
out[ 0] = 0x76; /* OP_DUP */
|
||||
out[ 1] = 0xa9; /* OP_HASH160 */
|
||||
out[ 2] = 0x14; /* push 20 bytes */
|
||||
memcpy(&out[3], &addrbin[1], 20);
|
||||
memcpy( &out[3], &addrbin[1], 20 );
|
||||
out[23] = 0x88; /* OP_EQUALVERIFY */
|
||||
out[24] = 0xac; /* OP_CHECKSIG */
|
||||
return rv;
|
||||
@@ -982,6 +983,7 @@ int timeval_subtract(struct timeval *result, struct timeval *x,
|
||||
return x->tv_sec < y->tv_sec;
|
||||
}
|
||||
|
||||
// deprecated, use test_hash_and_submit
|
||||
// Use this when deinterleaved
|
||||
// do 64 bit test 4 iterations
|
||||
inline bool valid_hash( const void *hash, const void *target )
|
||||
@@ -998,6 +1000,7 @@ inline bool valid_hash( const void *hash, const void *target )
|
||||
return true;
|
||||
}
|
||||
|
||||
// deprecated, use test_hash_and_submit
|
||||
bool fulltest( const uint32_t *hash, const uint32_t *target )
|
||||
{
|
||||
int i;
|
||||
@@ -1040,35 +1043,45 @@ bool fulltest( const uint32_t *hash, const uint32_t *target )
|
||||
|
||||
void diff_to_target(uint32_t *target, double diff)
|
||||
{
|
||||
uint64_t m;
|
||||
int k;
|
||||
|
||||
const double exp64 = (double)0xffffffffffffffff + 1.;
|
||||
for ( k = 3; k > 0 && diff > 1.0; k-- )
|
||||
diff /= exp64;
|
||||
uint64_t m;
|
||||
int k;
|
||||
|
||||
// for (k = 6; k > 0 && diff > 1.0; k--)
|
||||
// diff /= 4294967296.0;
|
||||
m = (uint64_t)( 0xffff0000 / diff );
|
||||
if unlikely( m == 0 && k == 3 )
|
||||
memset( target, 0xff, 32 );
|
||||
else
|
||||
{
|
||||
memset( target, 0, 32 );
|
||||
((uint64_t*)target)[k] = m;
|
||||
// target[k] = (uint32_t)m;
|
||||
// target[k + 1] = (uint32_t)(m >> 32);
|
||||
}
|
||||
for (k = 6; k > 0 && diff > 1.0; k--)
|
||||
diff /= exp32;
|
||||
|
||||
// diff /= 4294967296.0;
|
||||
|
||||
// m = (uint64_t)(4294901760.0 / diff);
|
||||
|
||||
m = (uint64_t)(exp32 / diff);
|
||||
|
||||
if (m == 0 && k == 6)
|
||||
memset(target, 0xff, 32);
|
||||
else {
|
||||
memset(target, 0, 32);
|
||||
target[k] = (uint32_t)m;
|
||||
target[k + 1] = (uint32_t)(m >> 32);
|
||||
}
|
||||
}
|
||||
|
||||
// Only used by stratum pools
|
||||
|
||||
// deprecated
|
||||
void work_set_target(struct work* work, double diff)
|
||||
{
|
||||
diff_to_target( work->target, diff );
|
||||
work->targetdiff = diff;
|
||||
}
|
||||
|
||||
// Only used by longpoll pools
|
||||
double target_to_diff( uint32_t* target )
|
||||
{
|
||||
uint64_t *targ = (uint64_t*)target;
|
||||
return target ? 1. / ( ( (double)targ[3] / exp32 )
|
||||
+ ( (double)targ[2] )
|
||||
+ ( (double)targ[1] * exp32 )
|
||||
+ ( (double)targ[0] * exp64 ) )
|
||||
: 0.;
|
||||
}
|
||||
/*
|
||||
double target_to_diff(uint32_t* target)
|
||||
{
|
||||
uchar* tgt = (uchar*) target;
|
||||
@@ -1087,6 +1100,7 @@ double target_to_diff(uint32_t* target)
|
||||
else
|
||||
return (double)0x0000ffff00000000/m;
|
||||
}
|
||||
*/
|
||||
|
||||
#ifdef WIN32
|
||||
#define socket_blocks() (WSAGetLastError() == WSAEWOULDBLOCK)
|
||||
@@ -1545,35 +1559,44 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
|
||||
|
||||
ret = true;
|
||||
|
||||
if (!opt_extranonce)
|
||||
if ( !opt_extranonce )
|
||||
goto out;
|
||||
|
||||
// subscribe to extranonce (optional)
|
||||
sprintf(s, "{\"id\": 3, \"method\": \"mining.extranonce.subscribe\", \"params\": []}");
|
||||
|
||||
if (!stratum_send_line(sctx, s))
|
||||
if ( !stratum_send_line( sctx, s ) )
|
||||
goto out;
|
||||
|
||||
if (!socket_full(sctx->sock, 3)) {
|
||||
applog(LOG_WARNING, "stratum extranonce subscribe timed out");
|
||||
goto out;
|
||||
if ( !socket_full( sctx->sock, 3 ) )
|
||||
{
|
||||
applog( LOG_WARNING, "Extranonce disabled, subscribe timed out" );
|
||||
opt_extranonce = false;
|
||||
goto out;
|
||||
}
|
||||
if ( !opt_quiet )
|
||||
applog( LOG_INFO, "Extranonce subscription enabled" );
|
||||
|
||||
sret = stratum_recv_line(sctx);
|
||||
if (sret) {
|
||||
json_t *extra = JSON_LOADS(sret, &err);
|
||||
if (!extra) {
|
||||
sret = stratum_recv_line( sctx );
|
||||
if ( sret )
|
||||
{
|
||||
json_t *extra = JSON_LOADS( sret, &err );
|
||||
if ( !extra )
|
||||
{
|
||||
applog(LOG_WARNING, "JSON decode failed(%d): %s", err.line, err.text);
|
||||
} else {
|
||||
if (json_integer_value(json_object_get(extra, "id")) != 3) {
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( json_integer_value(json_object_get( extra, "id" ) ) != 3 )
|
||||
{
|
||||
// we receive a standard method if extranonce is ignored
|
||||
if (!stratum_handle_method(sctx, sret))
|
||||
applog(LOG_WARNING, "Stratum answer id is not correct!");
|
||||
if ( !stratum_handle_method( sctx, sret ) )
|
||||
applog( LOG_WARNING, "Stratum answer id is not correct!" );
|
||||
}
|
||||
res_val = json_object_get(extra, "result");
|
||||
res_val = json_object_get( extra, "result" );
|
||||
// if (opt_debug && (!res_val || json_is_false(res_val)))
|
||||
// applog(LOG_DEBUG, "extranonce subscribe not supported");
|
||||
json_decref(extra);
|
||||
json_decref( extra );
|
||||
}
|
||||
free(sret);
|
||||
}
|
||||
|
Reference in New Issue
Block a user