diff --git a/Makefile.am b/Makefile.am index 99b2fee..d282ae8 100644 --- a/Makefile.am +++ b/Makefile.am @@ -102,9 +102,6 @@ cpuminer_SOURCES = \ algo/hamsi/hamsi-hash-4way.c \ algo/haval/haval.c \ algo/haval/haval-hash-4way.c \ - algo/heavy/sph_hefty1.c \ - algo/heavy/heavy.c \ - algo/heavy/bastion.c \ algo/hodl/aes.c \ algo/hodl/hodl-gate.c \ algo/hodl/hodl-wolf.c \ @@ -123,8 +120,6 @@ cpuminer_SOURCES = \ algo/keccak/sha3d-4way.c \ algo/keccak/sha3d.c \ algo/lanehash/lane.c \ - algo/luffa/sph_luffa.c \ - algo/luffa/luffa.c \ algo/luffa/luffa_for_sse2.c \ algo/luffa/luffa-hash-2way.c \ algo/lyra2/lyra2.c \ @@ -153,7 +148,6 @@ cpuminer_SOURCES = \ algo/nist5/zr5.c \ algo/panama/panama-hash-4way.c \ algo/panama/sph_panama.c \ - algo/radiogatun/sph_radiogatun.c \ algo/quark/quark-gate.c \ algo/quark/quark.c \ algo/quark/quark-4way.c \ @@ -176,7 +170,6 @@ cpuminer_SOURCES = \ algo/ripemd/lbry-4way.c \ algo/scrypt/scrypt.c \ algo/scrypt/neoscrypt.c \ - algo/scrypt/pluck.c \ algo/sha/sph_sha2.c \ algo/sha/sph_sha2big.c \ algo/sha/sha256-hash-4way.c \ @@ -195,7 +188,6 @@ cpuminer_SOURCES = \ algo/shavite/shavite-hash-2way.c \ algo/shavite/shavite-hash-4way.c \ algo/shavite/shavite.c \ - algo/simd/sph_simd.c \ algo/simd/nist.c \ algo/simd/vector.c \ algo/simd/simd-hash-2way.c \ @@ -233,7 +225,6 @@ cpuminer_SOURCES = \ algo/x11/timetravel10-gate.c \ algo/x11/timetravel10.c \ algo/x11/timetravel10-4way.c \ - algo/x11/fresh.c \ algo/x11/x11evo.c \ algo/x11/x11evo-4way.c \ algo/x11/x11evo-gate.c \ @@ -252,7 +243,6 @@ cpuminer_SOURCES = \ algo/x13/skunk-gate.c \ algo/x13/skunk-4way.c \ algo/x13/skunk.c \ - algo/x13/drop.c \ algo/x13/x13bcd-4way.c \ algo/x13/x13bcd.c \ algo/x14/x14-gate.c \ @@ -287,7 +277,6 @@ cpuminer_SOURCES = \ algo/x17/sonoa-gate.c \ algo/x17/sonoa-4way.c \ algo/x17/sonoa.c \ - algo/x20/x20r.c \ algo/x22/x22i-4way.c \ algo/x22/x22i.c \ algo/x22/x22i-gate.c \ diff --git a/README.md b/README.md index 0707fb7..fe38d37 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,6 @@ Supported Algorithms argon2d500 argon2d-dyn, Dynamic (DYN) argon2d4096 argon2d-uis, Unitus, (UIS) axiom Shabal-256 MemoHash - bastion blake Blake-256 (SFR) blake2b Blake2b 256 blake2s Blake-2 S @@ -64,10 +63,7 @@ Supported Algorithms decred deep Deepcoin (DCN) dmd-gr Diamond-Groestl - drop Dropcoin - fresh Fresh groestl Groestl coin - heavy Heavy hex x16r-hex hmq1725 Espers hodl Hodlcoin diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 1a90761..75699e9 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -65,6 +65,20 @@ If not what makes it happen or not happen? Change Log ---------- +v3.11.9 + +Fixed x16r invalid shares when Luffa was first in hash order. + +New startup message for status of stratum connection, API & extranonce. + +New log report for CPU temperature, frequency of fastest and slowest cores. + +Compile time is a little shorter and binary file size a little smaller +using conditional compilation.. + +Removed code for Bastion, Drop, Heavy, Luffa an Pluck algos and other unused +code. + v3.11.8 Fixed network hashrate showing incorrect data, should be close now. diff --git a/algo-gate-api.c b/algo-gate-api.c index 5f31b93..abca7e2 100644 --- a/algo-gate-api.c +++ b/algo-gate-api.c @@ -162,7 +162,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate ) case ALGO_ARGON2D500: register_argon2d_dyn_algo ( gate ); break; case ALGO_ARGON2D4096: register_argon2d4096_algo ( gate ); break; case ALGO_AXIOM: register_axiom_algo ( gate ); break; - case ALGO_BASTION: register_bastion_algo ( gate ); break; case ALGO_BLAKE: register_blake_algo ( gate ); break; case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break; case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break; @@ -175,10 +174,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate ) case ALGO_DECRED: register_decred_algo ( gate ); break; case ALGO_DEEP: register_deep_algo ( gate ); break; case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break; - case ALGO_DROP: register_drop_algo ( gate ); break; - case ALGO_FRESH: register_fresh_algo ( gate ); break; case ALGO_GROESTL: register_groestl_algo ( gate ); break; - case ALGO_HEAVY: register_heavy_algo ( gate ); break; case ALGO_HEX: register_hex_algo ( gate ); break; case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break; case ALGO_HODL: register_hodl_algo ( gate ); break; @@ -186,7 +182,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate ) case ALGO_KECCAK: register_keccak_algo ( gate ); break; case ALGO_KECCAKC: register_keccakc_algo ( gate ); break; case ALGO_LBRY: register_lbry_algo ( gate ); break; - case ALGO_LUFFA: register_luffa_algo ( gate ); break; case ALGO_LYRA2H: register_lyra2h_algo ( gate ); break; case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break; case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break; @@ -200,7 +195,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate ) case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break; case ALGO_PHI1612: register_phi1612_algo ( gate ); break; case ALGO_PHI2: register_phi2_algo ( gate ); break; - case ALGO_PLUCK: register_pluck_algo ( gate ); break; case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break; case ALGO_POWER2B: register_power2b_algo ( gate ); break; case ALGO_QUARK: register_quark_algo ( gate ); break; @@ -275,10 +269,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate ) // override std defaults with jr2 defaults bool register_json_rpc2( algo_gate_t *gate ) { - applog(LOG_WARNING,"\nCryptonight algorithm and variants are no longer"); - applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will"); - applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n"); - // gate->wait_for_diff = (void*)&do_nothing; gate->get_new_work = (void*)&jr2_get_new_work; gate->get_nonceptr = (void*)&jr2_get_nonceptr; @@ -360,7 +350,7 @@ void get_algo_alias( char** algo_or_alias ) if ( !strcasecmp( *algo_or_alias, algo_alias_map[i][ ALIAS ] ) ) { // found valid alias, return proper name - *algo_or_alias = (char* const)( algo_alias_map[i][ PROPER ] ); + *algo_or_alias = (const char*)( algo_alias_map[i][ PROPER ] ); return; } } diff --git a/algo/argon2/argon2d/argon2d-gate.c b/algo/argon2/argon2d/argon2d-gate.c index 300bf57..cd41a32 100644 --- a/algo/argon2/argon2d/argon2d-gate.c +++ b/algo/argon2/argon2d/argon2d-gate.c @@ -1,4 +1,5 @@ #include "argon2d-gate.h" +#include "simd-utils.h" #include "argon2d/argon2.h" static const size_t INPUT_BYTES = 80; // Lenth of a block header in bytes. Input Length = Salt Length (salt = input) @@ -36,7 +37,7 @@ void argon2d_crds_hash( void *output, const void *input ) int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { - uint32_t _ALIGN(64) endiandata[20]; + uint32_t _ALIGN(64) edata[20]; uint32_t _ALIGN(64) hash[8]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -45,11 +46,11 @@ int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce, const uint32_t Htarg = ptarget[7]; uint32_t nonce = first_nonce; - swab32_array( endiandata, pdata, 20 ); + swab32_array( edata, pdata, 20 ); do { - be32enc(&endiandata[19], nonce); - argon2d_crds_hash( hash, endiandata ); + be32enc(&edata[19], nonce); + argon2d_crds_hash( hash, edata ); if ( hash[7] <= Htarg && fulltest( hash, ptarget ) && !opt_benchmark ) { pdata[19] = nonce; @@ -103,31 +104,32 @@ void argon2d_dyn_hash( void *output, const void *input ) int scanhash_argon2d_dyn( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { - uint32_t _ALIGN(64) endiandata[20]; + uint32_t _ALIGN(64) edata[20]; uint32_t _ALIGN(64) hash[8]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; - int thr_id = mythr->id; // thr_id arg is deprecated - const uint32_t first_nonce = pdata[19]; - const uint32_t Htarg = ptarget[7]; + const int thr_id = mythr->id; + const uint32_t first_nonce = (const uint32_t)pdata[19]; + const uint32_t last_nonce = (const uint32_t)max_nonce; uint32_t nonce = first_nonce; + const bool bench = opt_benchmark; - swab32_array( endiandata, pdata, 20 ); - + mm128_bswap32_80( edata, pdata ); do { - be32enc(&endiandata[19], nonce); - argon2d_dyn_hash( hash, endiandata ); - if ( hash[7] <= Htarg && fulltest( hash, ptarget ) && !opt_benchmark ) + edata[19] = nonce; + argon2d_dyn_hash( hash, edata ); + if ( unlikely( valid_hash( (uint64_t*)hash, (uint64_t*)ptarget ) + && !bench ) ) { - pdata[19] = nonce; + pdata[19] = bswap_32( nonce );; submit_solution( work, hash, mythr ); } nonce++; - } while (nonce < max_nonce && !work_restart[thr_id].restart); + } while ( likely( nonce < last_nonce && !work_restart[thr_id].restart ) ); pdata[19] = nonce; - *hashes_done = pdata[19] - first_nonce + 1; + *hashes_done = pdata[19] - first_nonce; return 0; } @@ -146,36 +148,34 @@ int scanhash_argon2d4096( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(64) vhash[8]; - uint32_t _ALIGN(64) endiandata[20]; + uint32_t _ALIGN(64) edata[20]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; - const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = (const uint32_t)max_nonce; uint32_t n = first_nonce; - int thr_id = mythr->id; // thr_id arg is deprecated + const int thr_id = mythr->id; // thr_id arg is deprecated uint32_t t_cost = 1; // 1 iteration uint32_t m_cost = 4096; // use 4MB uint32_t parallelism = 1; // 1 thread, 2 lanes + const bool bench = opt_benchmark; - for ( int i = 0; i < 19; i++ ) - be32enc( &endiandata[i], pdata[i] ); + mm128_bswap32_80( edata, pdata ); do { - be32enc( &endiandata[19], n ); - argon2d_hash_raw( t_cost, m_cost, parallelism, (char*) endiandata, 80, - (char*) endiandata, 80, (char*) vhash, 32, ARGON2_VERSION_13 ); - if ( vhash[7] < Htarg && fulltest( vhash, ptarget ) && !opt_benchmark ) + edata[19] = n; + argon2d_hash_raw( t_cost, m_cost, parallelism, (char*) edata, 80, + (char*) edata, 80, (char*) vhash, 32, ARGON2_VERSION_13 ); + if ( unlikely( valid_hash( vhash, ptarget ) && !bench ) ) { - pdata[19] = n; + be32enc( &pdata[19], n ); submit_solution( work, vhash, mythr ); } n++; + } while ( likely( n < last_nonce && !work_restart[thr_id].restart ) ); - } while (n < max_nonce && !work_restart[thr_id].restart); - - *hashes_done = n - first_nonce + 1; + *hashes_done = n - first_nonce; pdata[19] = n; - return 0; } diff --git a/algo/blake/blake2b-hash-4way.c b/algo/blake/blake2b-hash-4way.c index 246716f..d9853c2 100644 --- a/algo/blake/blake2b-hash-4way.c +++ b/algo/blake/blake2b-hash-4way.c @@ -33,6 +33,8 @@ #include "blake2b-hash-4way.h" +#if defined(__AVX2__) + static const uint8_t sigma[12][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, @@ -203,9 +205,9 @@ void blake2b_8way_final( blake2b_8way_ctx *ctx, void *out ) casti_m512i( out, 3 ) = ctx->h[3]; } -#endif +#endif // AVX512 -#if defined(__AVX2__) +// AVX2 // G Mixing function. @@ -369,4 +371,4 @@ void blake2b_4way_final( blake2b_4way_ctx *ctx, void *out ) casti_m256i( out, 3 ) = ctx->h[3]; } -#endif +#endif // AVX2 diff --git a/algo/blake/blake2b.c b/algo/blake/blake2b.c index 63ee8eb..e2dedaa 100644 --- a/algo/blake/blake2b.c +++ b/algo/blake/blake2b.c @@ -4,6 +4,9 @@ */ #include "blake2b-gate.h" + +#if !defined(BLAKE2B_8WAY) && !defined(BLAKE2B_4WAY) + #include #include #include "algo/blake/sph_blake2b.h" @@ -58,3 +61,4 @@ int scanhash_blake2b( struct work *work, uint32_t max_nonce, return 0; } +#endif diff --git a/algo/blake/blake2s.c b/algo/blake/blake2s.c index 442859c..86d4f77 100644 --- a/algo/blake/blake2s.c +++ b/algo/blake/blake2s.c @@ -1,5 +1,7 @@ #include "blake2s-gate.h" +#if !defined(BLAKE2S_16WAY) && !defined(BLAKE2S_8WAY) && !defined(BLAKE2S) + #include #include @@ -70,3 +72,4 @@ int scanhash_blake2s( struct work *work, return 0; } +#endif diff --git a/algo/blake/blakecoin.c b/algo/blake/blakecoin.c index 3cea5d9..b8484ec 100644 --- a/algo/blake/blakecoin.c +++ b/algo/blake/blakecoin.c @@ -1,4 +1,7 @@ #include "blakecoin-gate.h" + +#if !defined(BLAKECOIN_8WAY) && !defined(BLAKECOIN_4WAY) + #define BLAKE32_ROUNDS 8 #include "sph_blake.h" @@ -93,3 +96,4 @@ int scanhash_blakecoin( struct work *work, uint32_t max_nonce, return 0; } +#endif diff --git a/algo/blake/decred.c b/algo/blake/decred.c index e6a0eb5..d60b32b 100644 --- a/algo/blake/decred.c +++ b/algo/blake/decred.c @@ -1,4 +1,7 @@ #include "decred-gate.h" + +#if !defined(DECRED_8WAY) && !defined(DECRED_4WAY) + #include "sph_blake.h" #include @@ -275,3 +278,5 @@ bool register_decred_algo( algo_gate_t* gate ) return true; } */ + +#endif diff --git a/algo/blake/pentablake.c b/algo/blake/pentablake.c index 55c874c..8bb8dc5 100644 --- a/algo/blake/pentablake.c +++ b/algo/blake/pentablake.c @@ -1,4 +1,7 @@ #include "pentablake-gate.h" + +#if !defined(PENTABLAKE_8WAY) && !defined(PENTABLAKE_4WAY) + #include #include #include @@ -111,3 +114,4 @@ int scanhash_pentablake( struct work *work, uint32_t max_nonce, return 0; } +#endif diff --git a/algo/bmw/bmw512.c b/algo/bmw/bmw512.c index 16620e1..77c22cc 100644 --- a/algo/bmw/bmw512.c +++ b/algo/bmw/bmw512.c @@ -1,5 +1,7 @@ #include "algo-gate-api.h" +#if !defined(BMW512_8WAY) && !defined(BMW512_4WAY) + #include #include #include @@ -50,4 +52,4 @@ int scanhash_bmw512( struct work *work, uint32_t max_nonce, pdata[19] = n; return 0; } - +#endif diff --git a/algo/bmw/sph_bmw.c b/algo/bmw/sph_bmw.c index a61ac65..6c5a6df 100644 --- a/algo/bmw/sph_bmw.c +++ b/algo/bmw/sph_bmw.c @@ -48,6 +48,8 @@ extern "C"{ #pragma warning (disable: 4146) #endif +#if !defined(__AVX2__) + static const sph_u32 IV224[] = { SPH_C32(0x00010203), SPH_C32(0x04050607), SPH_C32(0x08090A0B), SPH_C32(0x0C0D0E0F), @@ -70,6 +72,8 @@ static const sph_u32 IV256[] = { SPH_C32(0x78797A7B), SPH_C32(0x7C7D7E7F) }; +#endif // !AVX2 + #if SPH_64 static const sph_u64 IV384[] = { @@ -135,6 +139,8 @@ static const sph_u64 IV512[] = { #define M16_30 14, 15, 1, 2, 5, 8, 9 #define M16_31 15, 16, 2, 3, 6, 9, 10 +#if !defined(__AVX2__) + #define ss0(x) (((x) >> 1) ^ SPH_T32((x) << 3) \ ^ SPH_ROTL32(x, 4) ^ SPH_ROTL32(x, 19)) #define ss1(x) (((x) >> 1) ^ SPH_T32((x) << 2) \ @@ -189,6 +195,8 @@ static const sph_u64 IV512[] = { #define expand2s_(qf, mf, hf, i16, ix, iy) \ expand2s_inner LPAR qf, mf, hf, i16, ix, iy) +#endif // !AVX2 + #if SPH_64 #define sb0(x) (((x) >> 1) ^ SPH_T64((x) << 3) \ @@ -291,6 +299,8 @@ static const sph_u64 Kb_tab[] = { tt((M(i0) ^ H(i0)) op01 (M(i1) ^ H(i1)) op12 (M(i2) ^ H(i2)) \ op23 (M(i3) ^ H(i3)) op34 (M(i4) ^ H(i4))) +#if !defined(__AVX2__) + #define Ws0 MAKE_W(SPH_T32, 5, -, 7, +, 10, +, 13, +, 14) #define Ws1 MAKE_W(SPH_T32, 6, -, 8, +, 11, +, 14, -, 15) #define Ws2 MAKE_W(SPH_T32, 0, +, 7, +, 9, -, 12, +, 15) @@ -407,6 +417,8 @@ static const sph_u64 Kb_tab[] = { #define Qs(j) (qt[j]) +#endif // !AVX2 + #if SPH_64 #define Wb0 MAKE_W(SPH_T64, 5, -, 7, +, 10, +, 13, +, 14) @@ -557,7 +569,6 @@ static const sph_u64 Kb_tab[] = { + ((xl >> 2) ^ qf(22) ^ qf(15))); \ } while (0) -#define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_T32, SPH_ROTL32, M, Qs, dH) #if SPH_64 @@ -565,6 +576,10 @@ static const sph_u64 Kb_tab[] = { #endif +#if !defined(__AVX2__) + +#define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_T32, SPH_ROTL32, M, Qs, dH) + static void compress_small(const unsigned char *data, const sph_u32 h[16], sph_u32 dh[16]) { @@ -711,6 +726,8 @@ bmw32_close(sph_bmw_small_context *sc, unsigned ub, unsigned n, sph_enc32le(out + 4 * u, h1[v]); } +#endif // !AVX2 + #if SPH_64 static void @@ -840,6 +857,8 @@ bmw64_close(sph_bmw_big_context *sc, unsigned ub, unsigned n, #endif +#if !defined(__AVX2__) + /* see sph_bmw.h */ void sph_bmw224_init(void *cc) @@ -898,6 +917,8 @@ sph_bmw256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) // sph_bmw256_init(cc); } +#endif // !AVX2 + #if SPH_64 /* see sph_bmw.h */ diff --git a/algo/bmw/sph_bmw.h b/algo/bmw/sph_bmw.h index b10071a..f53dd27 100644 --- a/algo/bmw/sph_bmw.h +++ b/algo/bmw/sph_bmw.h @@ -77,6 +77,9 @@ extern "C"{ * computation can be cloned by copying the context (e.g. with a simple * memcpy()). */ + +#if !defined(__AVX2__) + typedef struct { #ifndef DOXYGEN_IGNORE unsigned char buf[64]; /* first field, for alignment */ @@ -102,6 +105,8 @@ typedef sph_bmw_small_context sph_bmw224_context; */ typedef sph_bmw_small_context sph_bmw256_context; +#endif // !AVX2 + #if SPH_64 /** @@ -137,6 +142,8 @@ typedef sph_bmw_big_context sph_bmw512_context; #endif +#if !defined(__AVX2__) + /** * Initialize a BMW-224 context. This process performs no memory allocation. * @@ -227,6 +234,8 @@ void sph_bmw256_close(void *cc, void *dst); void sph_bmw256_addbits_and_close( void *cc, unsigned ub, unsigned n, void *dst); +#endif // !AVX2 + #if SPH_64 /** diff --git a/algo/cryptonight/cryptolight.c b/algo/cryptonight/cryptolight.c index ca2923d..bf3d575 100644 --- a/algo/cryptonight/cryptolight.c +++ b/algo/cryptonight/cryptolight.c @@ -358,6 +358,9 @@ int scanhash_cryptolight( struct work *work, bool register_cryptolight_algo( algo_gate_t* gate ) { + applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer"); + applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will"); + applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n"); register_json_rpc2( gate ); gate->optimizations = SSE2_OPT | AES_OPT; gate->scanhash = (void*)&scanhash_cryptolight; diff --git a/algo/cryptonight/cryptonight-common.c b/algo/cryptonight/cryptonight-common.c index 2a5146f..e55837a 100644 --- a/algo/cryptonight/cryptonight-common.c +++ b/algo/cryptonight/cryptonight-common.c @@ -105,6 +105,9 @@ int scanhash_cryptonight( struct work *work, uint32_t max_nonce, bool register_cryptonight_algo( algo_gate_t* gate ) { + applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer"); + applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will"); + applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n"); cryptonightV7 = false; register_json_rpc2( gate ); gate->optimizations = SSE2_OPT | AES_OPT; @@ -116,6 +119,9 @@ bool register_cryptonight_algo( algo_gate_t* gate ) bool register_cryptonightv7_algo( algo_gate_t* gate ) { + applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer"); + applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will"); + applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n"); cryptonightV7 = true; register_json_rpc2( gate ); gate->optimizations = SSE2_OPT | AES_OPT; diff --git a/algo/echo/sph_echo.c b/algo/echo/sph_echo.c index 69c8f2f..99e7dac 100644 --- a/algo/echo/sph_echo.c +++ b/algo/echo/sph_echo.c @@ -36,6 +36,8 @@ #include "sph_echo.h" +#if !defined(__AES__) + #ifdef __cplusplus extern "C"{ #endif @@ -1028,4 +1030,5 @@ sph_echo512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) } #ifdef __cplusplus } -#endif +#endif +#endif // !AES diff --git a/algo/echo/sph_echo.h b/algo/echo/sph_echo.h index ad5441e..ae5a350 100644 --- a/algo/echo/sph_echo.h +++ b/algo/echo/sph_echo.h @@ -36,6 +36,8 @@ #ifndef SPH_ECHO_H__ #define SPH_ECHO_H__ +#if !defined(__AES__) + #ifdef __cplusplus extern "C"{ #endif @@ -316,5 +318,5 @@ void sph_echo512_addbits_and_close( #ifdef __cplusplus } #endif - +#endif // !AES #endif diff --git a/algo/groestl/aes_ni/groestl-intr-aes.h b/algo/groestl/aes_ni/groestl-intr-aes.h index 3c3e740..e09e8de 100644 --- a/algo/groestl/aes_ni/groestl-intr-aes.h +++ b/algo/groestl/aes_ni/groestl-intr-aes.h @@ -1,3 +1,6 @@ +#if !defined GROESTL_INTR_AES_H__ +#define GROESTL_INTR_AES_H__ + /* groestl-intr-aes.h Aug 2011 * * Groestl implementation with intrinsics using ssse3, sse4.1, and aes @@ -11,6 +14,52 @@ #include #include "hash-groestl.h" +static const __m128i round_const_p[] __attribute__ ((aligned (64))) = +{ + { 0x7060504030201000, 0xf0e0d0c0b0a09080 }, + { 0x7161514131211101, 0xf1e1d1c1b1a19181 }, + { 0x7262524232221202, 0xf2e2d2c2b2a29282 }, + { 0x7363534333231303, 0xf3e3d3c3b3a39383 }, + { 0x7464544434241404, 0xf4e4d4c4b4a49484 }, + { 0x7565554535251505, 0xf5e5d5c5b5a59585 }, + { 0x7666564636261606, 0xf6e6d6c6b6a69686 }, + { 0x7767574737271707, 0xf7e7d7c7b7a79787 }, + { 0x7868584838281808, 0xf8e8d8c8b8a89888 }, + { 0x7969594939291909, 0xf9e9d9c9b9a99989 }, + { 0x7a6a5a4a3a2a1a0a, 0xfaeadacabaaa9a8a }, + { 0x7b6b5b4b3b2b1b0b, 0xfbebdbcbbbab9b8b }, + { 0x7c6c5c4c3c2c1c0c, 0xfcecdcccbcac9c8c }, + { 0x7d6d5d4d3d2d1d0d, 0xfdedddcdbdad9d8d } +}; + +static const __m128i round_const_q[] __attribute__ ((aligned (64))) = +{ + { 0x8f9fafbfcfdfefff, 0x0f1f2f3f4f5f6f7f }, + { 0x8e9eaebecedeeefe, 0x0e1e2e3e4e5e6e7e }, + { 0x8d9dadbdcdddedfd, 0x0d1d2d3d4d5d6d7d }, + { 0x8c9cacbcccdcecfc, 0x0c1c2c3c4c5c6c7c }, + { 0x8b9babbbcbdbebfb, 0x0b1b2b3b4b5b6b7b }, + { 0x8a9aaabacadaeafa, 0x0a1a2a3a4a5a6a7a }, + { 0x8999a9b9c9d9e9f9, 0x0919293949596979 }, + { 0x8898a8b8c8d8e8f8, 0x0818283848586878 }, + { 0x8797a7b7c7d7e7f7, 0x0717273747576777 }, + { 0x8696a6b6c6d6e6f6, 0x0616263646566676 }, + { 0x8595a5b5c5d5e5f5, 0x0515253545556575 }, + { 0x8494a4b4c4d4e4f4, 0x0414243444546474 }, + { 0x8393a3b3c3d3e3f3, 0x0313233343536373 }, + { 0x8292a2b2c2d2e2f2, 0x0212223242526272 } +}; + +static const __m128i TRANSP_MASK = { 0x0d0509010c040800, 0x0f070b030e060a02 }; +static const __m128i SUBSH_MASK0 = { 0x0b0e0104070a0d00, 0x0306090c0f020508 }; +static const __m128i SUBSH_MASK1 = { 0x0c0f0205080b0e01, 0x04070a0d00030609 }; +static const __m128i SUBSH_MASK2 = { 0x0d000306090c0f02, 0x05080b0e0104070a }; +static const __m128i SUBSH_MASK3 = { 0x0e0104070a0d0003, 0x06090c0f0205080b }; +static const __m128i SUBSH_MASK4 = { 0x0f0205080b0e0104, 0x070a0d000306090c }; +static const __m128i SUBSH_MASK5 = { 0x000306090c0f0205, 0x080b0e0104070a0d }; +static const __m128i SUBSH_MASK6 = { 0x0104070a0d000306, 0x090c0f0205080b0e }; +static const __m128i SUBSH_MASK7 = { 0x06090c0f0205080b, 0x0e0104070a0d0003 }; + #define tos(a) #a #define tostr(a) tos(a) @@ -141,42 +190,6 @@ }/*MixBytes*/ -static const uint64_t round_const_p[] __attribute__ ((aligned (64))) = -{ - 0x7060504030201000, 0xf0e0d0c0b0a09080, - 0x7161514131211101, 0xf1e1d1c1b1a19181, - 0x7262524232221202, 0xf2e2d2c2b2a29282, - 0x7363534333231303, 0xf3e3d3c3b3a39383, - 0x7464544434241404, 0xf4e4d4c4b4a49484, - 0x7565554535251505, 0xf5e5d5c5b5a59585, - 0x7666564636261606, 0xf6e6d6c6b6a69686, - 0x7767574737271707, 0xf7e7d7c7b7a79787, - 0x7868584838281808, 0xf8e8d8c8b8a89888, - 0x7969594939291909, 0xf9e9d9c9b9a99989, - 0x7a6a5a4a3a2a1a0a, 0xfaeadacabaaa9a8a, - 0x7b6b5b4b3b2b1b0b, 0xfbebdbcbbbab9b8b, - 0x7c6c5c4c3c2c1c0c, 0xfcecdcccbcac9c8c, - 0x7d6d5d4d3d2d1d0d, 0xfdedddcdbdad9d8d -}; - -static const uint64_t round_const_q[] __attribute__ ((aligned (64))) = -{ - 0x8f9fafbfcfdfefff, 0x0f1f2f3f4f5f6f7f, - 0x8e9eaebecedeeefe, 0x0e1e2e3e4e5e6e7e, - 0x8d9dadbdcdddedfd, 0x0d1d2d3d4d5d6d7d, - 0x8c9cacbcccdcecfc, 0x0c1c2c3c4c5c6c7c, - 0x8b9babbbcbdbebfb, 0x0b1b2b3b4b5b6b7b, - 0x8a9aaabacadaeafa, 0x0a1a2a3a4a5a6a7a, - 0x8999a9b9c9d9e9f9, 0x0919293949596979, - 0x8898a8b8c8d8e8f8, 0x0818283848586878, - 0x8797a7b7c7d7e7f7, 0x0717273747576777, - 0x8696a6b6c6d6e6f6, 0x0616263646566676, - 0x8595a5b5c5d5e5f5, 0x0515253545556575, - 0x8494a4b4c4d4e4f4, 0x0414243444546474, - 0x8393a3b3c3d3e3f3, 0x0313233343536373, - 0x8292a2b2c2d2e2f2, 0x0212223242526272 -}; - /* one round * a0-a7 = input rows * b0-b7 = output rows @@ -203,22 +216,14 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) = xmm8 = _mm_xor_si128( xmm8, \ casti_m128i( round_const_p, round_counter ) ); \ /* ShiftBytes P1024 + pre-AESENCLAST */\ - xmm8 = _mm_shuffle_epi8( xmm8, m128_const_64( 0x0306090c0f020508, \ - 0x0b0e0104070a0d00 ) ); \ - xmm9 = _mm_shuffle_epi8( xmm9, m128_const_64( 0x04070a0d00030609, \ - 0x0c0f0205080b0e01 ) ); \ - xmm10 = _mm_shuffle_epi8( xmm10, m128_const_64( 0x05080b0e0104070a, \ - 0x0d000306090c0f02 ) ); \ - xmm11 = _mm_shuffle_epi8( xmm11, m128_const_64( 0x06090c0f0205080b, \ - 0x0e0104070a0d0003 ) ); \ - xmm12 = _mm_shuffle_epi8( xmm12, m128_const_64( 0x070a0d000306090c, \ - 0x0f0205080b0e0104 ) ); \ - xmm13 = _mm_shuffle_epi8( xmm13, m128_const_64( 0x080b0e0104070a0d, \ - 0x000306090c0f0205 ) ); \ - xmm14 = _mm_shuffle_epi8( xmm14, m128_const_64( 0x090c0f0205080b0e, \ - 0x0104070a0d000306 ) ); \ - xmm15 = _mm_shuffle_epi8( xmm15, m128_const_64( 0x0e0104070a0d0003, \ - 0x06090c0f0205080b ) ); \ + xmm8 = _mm_shuffle_epi8( xmm8, SUBSH_MASK0 ); \ + xmm9 = _mm_shuffle_epi8( xmm9, SUBSH_MASK1 ); \ + xmm10 = _mm_shuffle_epi8( xmm10, SUBSH_MASK2 ); \ + xmm11 = _mm_shuffle_epi8( xmm11, SUBSH_MASK3 ); \ + xmm12 = _mm_shuffle_epi8( xmm12, SUBSH_MASK4 ); \ + xmm13 = _mm_shuffle_epi8( xmm13, SUBSH_MASK5 ); \ + xmm14 = _mm_shuffle_epi8( xmm14, SUBSH_MASK6 ); \ + xmm15 = _mm_shuffle_epi8( xmm15, SUBSH_MASK7 ); \ /* SubBytes + MixBytes */\ SUBMIX( xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, \ xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 ); \ @@ -226,22 +231,14 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) = /* AddRoundConstant P1024 */\ xmm0 = _mm_xor_si128( xmm0, \ casti_m128i( round_const_p, round_counter+1 ) ); \ - xmm0 = _mm_shuffle_epi8( xmm0, m128_const_64( 0x0306090c0f020508, \ - 0x0b0e0104070a0d00 ) ); \ - xmm1 = _mm_shuffle_epi8( xmm1, m128_const_64( 0x04070a0d00030609, \ - 0x0c0f0205080b0e01 ) ); \ - xmm2 = _mm_shuffle_epi8( xmm2, m128_const_64( 0x05080b0e0104070a, \ - 0x0d000306090c0f02 ) ); \ - xmm3 = _mm_shuffle_epi8( xmm3, m128_const_64( 0x06090c0f0205080b, \ - 0x0e0104070a0d0003 ) ); \ - xmm4 = _mm_shuffle_epi8( xmm4, m128_const_64( 0x070a0d000306090c, \ - 0x0f0205080b0e0104 ) ); \ - xmm5 = _mm_shuffle_epi8( xmm5, m128_const_64( 0x080b0e0104070a0d, \ - 0x000306090c0f0205 ) ); \ - xmm6 = _mm_shuffle_epi8( xmm6, m128_const_64( 0x090c0f0205080b0e, \ - 0x0104070a0d000306 ) ); \ - xmm7 = _mm_shuffle_epi8( xmm7, m128_const_64( 0x0e0104070a0d0003, \ - 0x06090c0f0205080b ) ); \ + xmm0 = _mm_shuffle_epi8( xmm0, SUBSH_MASK0 ); \ + xmm1 = _mm_shuffle_epi8( xmm1, SUBSH_MASK1 ); \ + xmm2 = _mm_shuffle_epi8( xmm2, SUBSH_MASK2 ); \ + xmm3 = _mm_shuffle_epi8( xmm3, SUBSH_MASK3 ); \ + xmm4 = _mm_shuffle_epi8( xmm4, SUBSH_MASK4 ); \ + xmm5 = _mm_shuffle_epi8( xmm5, SUBSH_MASK5 ); \ + xmm6 = _mm_shuffle_epi8( xmm6, SUBSH_MASK6 ); \ + xmm7 = _mm_shuffle_epi8( xmm7, SUBSH_MASK7 ); \ SUBMIX( xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, \ xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 ); \ }\ @@ -262,22 +259,14 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) = xmm15 = _mm_xor_si128( xmm15, \ casti_m128i( round_const_q, round_counter ) ); \ /* ShiftBytes Q1024 + pre-AESENCLAST */\ - xmm8 = _mm_shuffle_epi8( xmm8, m128_const_64( 0x04070a0d00030609, \ - 0x0c0f0205080b0e01 ) ); \ - xmm9 = _mm_shuffle_epi8( xmm9, m128_const_64( 0x06090c0f0205080b, \ - 0x0e0104070a0d0003 ) ); \ - xmm10 = _mm_shuffle_epi8( xmm10, m128_const_64( 0x080b0e0104070a0d, \ - 0x000306090c0f0205 ) ); \ - xmm11 = _mm_shuffle_epi8( xmm11, m128_const_64( 0x0e0104070a0d0003, \ - 0x06090c0f0205080b ) ); \ - xmm12 = _mm_shuffle_epi8( xmm12, m128_const_64( 0x0306090c0f020508, \ - 0x0b0e0104070a0d00 ) ); \ - xmm13 = _mm_shuffle_epi8( xmm13, m128_const_64( 0x05080b0e0104070a, \ - 0x0d000306090c0f02 ) ); \ - xmm14 = _mm_shuffle_epi8( xmm14, m128_const_64( 0x070a0d000306090c, \ - 0x0f0205080b0e0104 ) ); \ - xmm15 = _mm_shuffle_epi8( xmm15, m128_const_64( 0x090c0f0205080b0e, \ - 0x0104070a0d000306 ) ); \ + xmm8 = _mm_shuffle_epi8( xmm8, SUBSH_MASK1 ); \ + xmm9 = _mm_shuffle_epi8( xmm9, SUBSH_MASK3 ); \ + xmm10 = _mm_shuffle_epi8( xmm10, SUBSH_MASK5 ); \ + xmm11 = _mm_shuffle_epi8( xmm11, SUBSH_MASK7 ); \ + xmm12 = _mm_shuffle_epi8( xmm12, SUBSH_MASK0 ); \ + xmm13 = _mm_shuffle_epi8( xmm13, SUBSH_MASK2 ); \ + xmm14 = _mm_shuffle_epi8( xmm14, SUBSH_MASK4 ); \ + xmm15 = _mm_shuffle_epi8( xmm15, SUBSH_MASK6 ); \ /* SubBytes + MixBytes */\ SUBMIX( xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, \ xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6 , xmm7 ); \ @@ -294,22 +283,14 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) = xmm7 = _mm_xor_si128( xmm7, \ casti_m128i( round_const_q, round_counter+1 ) ); \ /* ShiftBytes Q1024 + pre-AESENCLAST */\ - xmm0 = _mm_shuffle_epi8( xmm0, m128_const_64( 0x04070a0d00030609, \ - 0x0c0f0205080b0e01 ) ); \ - xmm1 = _mm_shuffle_epi8( xmm1, m128_const_64( 0x06090c0f0205080b, \ - 0x0e0104070a0d0003 ) ); \ - xmm2 = _mm_shuffle_epi8( xmm2, m128_const_64( 0x080b0e0104070a0d, \ - 0x000306090c0f0205 ) ); \ - xmm3 = _mm_shuffle_epi8( xmm3, m128_const_64( 0x0e0104070a0d0003, \ - 0x06090c0f0205080b ) ); \ - xmm4 = _mm_shuffle_epi8( xmm4, m128_const_64( 0x0306090c0f020508, \ - 0x0b0e0104070a0d00 ) ); \ - xmm5 = _mm_shuffle_epi8( xmm5, m128_const_64( 0x05080b0e0104070a, \ - 0x0d000306090c0f02 ) ); \ - xmm6 = _mm_shuffle_epi8( xmm6, m128_const_64( 0x070a0d000306090c, \ - 0x0f0205080b0e0104 ) ); \ - xmm7 = _mm_shuffle_epi8( xmm7, m128_const_64( 0x090c0f0205080b0e, \ - 0x0104070a0d000306 ) ); \ + xmm0 = _mm_shuffle_epi8( xmm0, SUBSH_MASK1 ); \ + xmm1 = _mm_shuffle_epi8( xmm1, SUBSH_MASK3 ); \ + xmm2 = _mm_shuffle_epi8( xmm2, SUBSH_MASK5 ); \ + xmm3 = _mm_shuffle_epi8( xmm3, SUBSH_MASK7 ); \ + xmm4 = _mm_shuffle_epi8( xmm4, SUBSH_MASK0 ); \ + xmm5 = _mm_shuffle_epi8( xmm5, SUBSH_MASK2 ); \ + xmm6 = _mm_shuffle_epi8( xmm6, SUBSH_MASK4 ); \ + xmm7 = _mm_shuffle_epi8( xmm7, SUBSH_MASK6 ); \ /* SubBytes + MixBytes */\ SUBMIX( xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, \ xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 ); \ @@ -324,7 +305,7 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) = * clobbers: t0-t7 */ #define Matrix_Transpose(i0, i1, i2, i3, i4, i5, i6, i7, t0, t1, t2, t3, t4, t5, t6, t7){\ - t0 = m128_const_64( 0x0f070b030e060a02, 0x0d0509010c040800 );\ + t0 = TRANSP_MASK; \ \ i6 = _mm_shuffle_epi8(i6, t0);\ i0 = _mm_shuffle_epi8(i0, t0);\ @@ -412,7 +393,7 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) = i4 = _mm_unpacklo_epi64(i4, i5);\ t1 = _mm_unpackhi_epi64(t1, i5);\ t2 = i6;\ - o0 = m128_const_64( 0x0f070b030e060a02, 0x0d0509010c040800 ); \ + o0 = TRANSP_MASK; \ i6 = _mm_unpacklo_epi64(i6, i7);\ t2 = _mm_unpackhi_epi64(t2, i7);\ /* load transpose mask into a register, because it will be used 8 times */\ @@ -653,3 +634,4 @@ void OF1024( __m128i* chaining ) return; } +#endif diff --git a/algo/groestl/aes_ni/groestl256-intr-aes.h b/algo/groestl/aes_ni/groestl256-intr-aes.h index 15517cf..61c1b7b 100644 --- a/algo/groestl/aes_ni/groestl256-intr-aes.h +++ b/algo/groestl/aes_ni/groestl256-intr-aes.h @@ -11,6 +11,45 @@ #include #include "hash-groestl256.h" +static const __m128i round_const_l0[] __attribute__ ((aligned (64))) = +{ + { 0x7060504030201000, 0xffffffffffffffff }, + { 0x7161514131211101, 0xffffffffffffffff }, + { 0x7262524232221202, 0xffffffffffffffff }, + { 0x7363534333231303, 0xffffffffffffffff }, + { 0x7464544434241404, 0xffffffffffffffff }, + { 0x7565554535251505, 0xffffffffffffffff }, + { 0x7666564636261606, 0xffffffffffffffff }, + { 0x7767574737271707, 0xffffffffffffffff }, + { 0x7868584838281808, 0xffffffffffffffff }, + { 0x7969594939291909, 0xffffffffffffffff } +}; + +static const __m128i round_const_l7[] __attribute__ ((aligned (64))) = +{ + { 0x0000000000000000, 0x8f9fafbfcfdfefff }, + { 0x0000000000000000, 0x8e9eaebecedeeefe }, + { 0x0000000000000000, 0x8d9dadbdcdddedfd }, + { 0x0000000000000000, 0x8c9cacbcccdcecfc }, + { 0x0000000000000000, 0x8b9babbbcbdbebfb }, + { 0x0000000000000000, 0x8a9aaabacadaeafa }, + { 0x0000000000000000, 0x8999a9b9c9d9e9f9 }, + { 0x0000000000000000, 0x8898a8b8c8d8e8f8 }, + { 0x0000000000000000, 0x8797a7b7c7d7e7f7 }, + { 0x0000000000000000, 0x8696a6b6c6d6e6f6 } +}; + +static const __m128i TRANSP_MASK = { 0x0d0509010c040800, 0x0f070b030e060a02 }; + +static const __m128i SUBSH_MASK0 = { 0x0c0f0104070b0e00, 0x03060a0d08020509 }; +static const __m128i SUBSH_MASK1 = { 0x0e090205000d0801, 0x04070c0f0a03060b }; +static const __m128i SUBSH_MASK2 = { 0x080b0306010f0a02, 0x05000e090c04070d }; +static const __m128i SUBSH_MASK3 = { 0x0a0d040702090c03, 0x0601080b0e05000f }; +static const __m128i SUBSH_MASK4 = { 0x0b0e0500030a0d04, 0x0702090c0f060108 }; +static const __m128i SUBSH_MASK5 = { 0x0d080601040c0f05, 0x00030b0e0907020a }; +static const __m128i SUBSH_MASK6 = { 0x0f0a0702050e0906, 0x01040d080b00030c }; +static const __m128i SUBSH_MASK7 = { 0x090c000306080b07, 0x02050f0a0d01040e }; + #define tos(a) #a #define tostr(a) tos(a) @@ -26,8 +65,6 @@ i = _mm_xor_si128(i, j);\ } - /**/ - /* Yet another implementation of MixBytes. This time we use the formulae (3) from the paper "Byte Slicing Groestl". Input: a0, ..., a7 @@ -141,36 +178,6 @@ b1 = _mm_xor_si128(b1, a4);\ }/*MixBytes*/ - -static const uint64_t round_const_l0[] __attribute__ ((aligned (64))) = -{ - 0x7060504030201000, 0xffffffffffffffff, - 0x7161514131211101, 0xffffffffffffffff, - 0x7262524232221202, 0xffffffffffffffff, - 0x7363534333231303, 0xffffffffffffffff, - 0x7464544434241404, 0xffffffffffffffff, - 0x7565554535251505, 0xffffffffffffffff, - 0x7666564636261606, 0xffffffffffffffff, - 0x7767574737271707, 0xffffffffffffffff, - 0x7868584838281808, 0xffffffffffffffff, - 0x7969594939291909, 0xffffffffffffffff -}; - -static const uint64_t round_const_l7[] __attribute__ ((aligned (64))) = -{ -0x0000000000000000, 0x8f9fafbfcfdfefff, -0x0000000000000000, 0x8e9eaebecedeeefe, -0x0000000000000000, 0x8d9dadbdcdddedfd, -0x0000000000000000, 0x8c9cacbcccdcecfc, -0x0000000000000000, 0x8b9babbbcbdbebfb, -0x0000000000000000, 0x8a9aaabacadaeafa, -0x0000000000000000, 0x8999a9b9c9d9e9f9, -0x0000000000000000, 0x8898a8b8c8d8e8f8, -0x0000000000000000, 0x8797a7b7c7d7e7f7, -0x0000000000000000, 0x8696a6b6c6d6e6f6 -}; - - /* one round * i = round number * a0-a7 = input rows @@ -190,29 +197,21 @@ static const uint64_t round_const_l7[] __attribute__ ((aligned (64))) = \ /* ShiftBytes + SubBytes (interleaved) */\ b0 = _mm_xor_si128(b0, b0);\ - a0 = _mm_shuffle_epi8( a0, m128_const_64( 0x03060a0d08020509, \ - 0x0c0f0104070b0e00 ) ); \ + a0 = _mm_shuffle_epi8( a0, SUBSH_MASK0 ); \ a0 = _mm_aesenclast_si128( a0, b0 );\ - a1 = _mm_shuffle_epi8( a1, m128_const_64( 0x04070c0f0a03060b, \ - 0x0e090205000d0801 ) ); \ + a1 = _mm_shuffle_epi8( a1, SUBSH_MASK1 ); \ a1 = _mm_aesenclast_si128( a1, b0 );\ - a2 = _mm_shuffle_epi8( a2, m128_const_64( 0x05000e090c04070d, \ - 0x080b0306010f0a02 ) ); \ + a2 = _mm_shuffle_epi8( a2, SUBSH_MASK2 ); \ a2 = _mm_aesenclast_si128( a2, b0 );\ - a3 = _mm_shuffle_epi8( a3, m128_const_64( 0x0601080b0e05000f, \ - 0x0a0d040702090c03 ) ); \ + a3 = _mm_shuffle_epi8( a3, SUBSH_MASK3 ); \ a3 = _mm_aesenclast_si128( a3, b0 );\ - a4 = _mm_shuffle_epi8( a4, m128_const_64( 0x0702090c0f060108, \ - 0x0b0e0500030a0d04 ) ); \ + a4 = _mm_shuffle_epi8( a4, SUBSH_MASK4 ); \ a4 = _mm_aesenclast_si128( a4, b0 );\ - a5 = _mm_shuffle_epi8( a5, m128_const_64( 0x00030b0e0907020a, \ - 0x0d080601040c0f05 ) ); \ + a5 = _mm_shuffle_epi8( a5, SUBSH_MASK5 ); \ a5 = _mm_aesenclast_si128( a5, b0 );\ - a6 = _mm_shuffle_epi8( a6, m128_const_64( 0x01040d080b00030c, \ - 0x0f0a0702050e0906 ) ); \ + a6 = _mm_shuffle_epi8( a6, SUBSH_MASK6 ); \ a6 = _mm_aesenclast_si128( a6, b0 );\ - a7 = _mm_shuffle_epi8( a7, m128_const_64( 0x02050f0a0d01040e, \ - 0x090c000306080b07 ) ); \ + a7 = _mm_shuffle_epi8( a7, SUBSH_MASK7 ); \ a7 = _mm_aesenclast_si128( a7, b0 );\ \ /* MixBytes */\ @@ -241,8 +240,9 @@ static const uint64_t round_const_l7[] __attribute__ ((aligned (64))) = * outputs: i0, o1-o3 * clobbers: t0 */ + #define Matrix_Transpose_A(i0, i1, i2, i3, o1, o2, o3, t0){\ - t0 = m128_const_64( 0x0f070b030e060a02, 0x0d0509010c040800 ); \ + t0 = TRANSP_MASK; \ \ i0 = _mm_shuffle_epi8(i0, t0);\ i1 = _mm_shuffle_epi8(i1, t0);\ diff --git a/algo/groestl/aes_ni/hash-groestl256.c b/algo/groestl/aes_ni/hash-groestl256.c index 34a37b1..53f45a6 100644 --- a/algo/groestl/aes_ni/hash-groestl256.c +++ b/algo/groestl/aes_ni/hash-groestl256.c @@ -214,6 +214,98 @@ HashReturn_gr update_and_final_groestl256( hashState_groestl256* ctx, return SUCCESS_GR; } +int groestl256_full( hashState_groestl256* ctx, + void* output, const void* input, DataLength_gr databitlen ) +{ + int i; + ctx->hashlen = 32; + for ( i = 0; i < SIZE256; i++ ) + { + ctx->chaining[i] = _mm_setzero_si128(); + ctx->buffer[i] = _mm_setzero_si128(); + } + ((u64*)ctx->chaining)[COLS-1] = U64BIG((u64)LENGTH); + INIT256( ctx->chaining ); + ctx->buf_ptr = 0; + ctx->rem_ptr = 0; + + const int len = (int)databitlen / 128; + const int hashlen_m128i = ctx->hashlen / 16; // bytes to __m128i + const int hash_offset = SIZE256 - hashlen_m128i; + int rem = ctx->rem_ptr; + int blocks = len / SIZE256; + __m128i* in = (__m128i*)input; + + // --- update --- + + // digest any full blocks, process directly from input + for ( i = 0; i < blocks; i++ ) + TF512( ctx->chaining, &in[ i * SIZE256 ] ); + ctx->buf_ptr = blocks * SIZE256; + + // cryptonight has 200 byte input, an odd number of __m128i + // remainder is only 8 bytes, ie u64. + if ( databitlen % 128 !=0 ) + { + // must be cryptonight, copy 64 bits of data + *(uint64_t*)(ctx->buffer) = *(uint64_t*)(&in[ ctx->buf_ptr ] ); + i = -1; // signal for odd length + } + else + { + // Copy any remaining data to buffer for final transform + for ( i = 0; i < len % SIZE256; i++ ) + ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ]; + i += rem; // use i as rem_ptr in final + } + + //--- final --- + + // adjust for final block + blocks++; + + if ( i == len - 1 ) + { + // all padding at once + ctx->buffer[i] = _mm_set_epi8( blocks,blocks>>8,0,0, 0,0,0,0, + 0, 0,0,0, 0,0,0,0x80 ); + } + else + { + if ( i == -1 ) + { + // cryptonight odd length + ((uint64_t*)ctx->buffer)[ 1 ] = 0x80ull; + // finish the block with zero and length padding as normal + i = 0; + } + else + { + // add first padding + ctx->buffer[i] = _mm_set_epi8( 0,0,0,0, 0,0,0,0, + 0,0,0,0, 0,0,0,0x80 ); + } + // add zero padding + for ( i += 1; i < SIZE256 - 1; i++ ) + ctx->buffer[i] = _mm_setzero_si128(); + // add length padding + // cheat since we know the block count is trivial, good if block < 256 + ctx->buffer[i] = _mm_set_epi8( blocks,blocks>>8,0,0, 0,0,0,0, + 0, 0,0,0, 0,0,0,0 ); + } + + // digest final padding block and do output transform + TF512( ctx->chaining, ctx->buffer ); + OF512( ctx->chaining ); + + // store hash result in output + for ( i = 0; i < hashlen_m128i; i++ ) + casti_m128i( output, i ) = ctx->chaining[ hash_offset + i ]; + + return SUCCESS_GR; +} + + /* hash bit sequence */ HashReturn_gr hash_groestl256(int hashbitlen, const BitSequence_gr* data, diff --git a/algo/groestl/aes_ni/hash-groestl256.h b/algo/groestl/aes_ni/hash-groestl256.h index b228d3f..9410266 100644 --- a/algo/groestl/aes_ni/hash-groestl256.h +++ b/algo/groestl/aes_ni/hash-groestl256.h @@ -115,4 +115,7 @@ HashReturn_gr hash_groestli256( int, const BitSequence_gr*, DataLength_gr, HashReturn_gr update_and_final_groestl256( hashState_groestl256*, void*, const void*, DataLength_gr ); +int groestl256_full( hashState_groestl256* ctx, + void* output, const void* input, DataLength_gr databitlen ); + #endif /* __hash_h */ diff --git a/algo/groestl/groestl.c b/algo/groestl/groestl.c index d4fc2b7..0ab31cc 100644 --- a/algo/groestl/groestl.c +++ b/algo/groestl/groestl.c @@ -1,4 +1,7 @@ #include "groestl-gate.h" + +#if !defined(GROESTL_8WAY) && !defined(GROESTLX16R_4WAY) + #include #include #include @@ -88,4 +91,4 @@ int scanhash_groestl( struct work *work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } - +#endif diff --git a/algo/groestl/groestl256-hash-4way.c b/algo/groestl/groestl256-hash-4way.c index 48c39bf..ef296fd 100644 --- a/algo/groestl/groestl256-hash-4way.c +++ b/algo/groestl/groestl256-hash-4way.c @@ -23,7 +23,6 @@ int groestl256_4way_init( groestl256_4way_context* ctx, uint64_t hashlen ) int i; ctx->hashlen = hashlen; - SET_CONSTANTS(); if (ctx->chaining == NULL || ctx->buffer == NULL) return 1; @@ -36,9 +35,6 @@ int groestl256_4way_init( groestl256_4way_context* ctx, uint64_t hashlen ) // The only non-zero in the IV is len. It can be hard coded. ctx->chaining[ 3 ] = m512_const2_64( 0, 0x0100000000000000 ); -// uint64_t len = U64BIG((uint64_t)LENGTH); -// ctx->chaining[ COLS/2 -1 ] = _mm512_set4_epi64( len, 0, len, 0 ); -// INIT256_4way(ctx->chaining); ctx->buf_ptr = 0; ctx->rem_ptr = 0; @@ -46,6 +42,77 @@ int groestl256_4way_init( groestl256_4way_context* ctx, uint64_t hashlen ) return 0; } +int groestl256_4way_full( groestl256_4way_context* ctx, void* output, + const void* input, uint64_t databitlen ) +{ + const int len = (int)databitlen / 128; + const int hashlen_m128i = 32 / 16; // bytes to __m128i + const int hash_offset = SIZE256 - hashlen_m128i; + int rem = ctx->rem_ptr; + int blocks = len / SIZE256; + __m512i* in = (__m512i*)input; + int i; + + if (ctx->chaining == NULL || ctx->buffer == NULL) + return 1; + + for ( i = 0; i < SIZE256; i++ ) + { + ctx->chaining[i] = m512_zero; + ctx->buffer[i] = m512_zero; + } + + // The only non-zero in the IV is len. It can be hard coded. + ctx->chaining[ 3 ] = m512_const2_64( 0, 0x0100000000000000 ); + ctx->buf_ptr = 0; + ctx->rem_ptr = 0; + + // --- update --- + + // digest any full blocks, process directly from input + for ( i = 0; i < blocks; i++ ) + TF512_4way( ctx->chaining, &in[ i * SIZE256 ] ); + ctx->buf_ptr = blocks * SIZE256; + + // copy any remaining data to buffer, it may already contain data + // from a previous update for a midstate precalc + for ( i = 0; i < len % SIZE256; i++ ) + ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ]; + i += rem; // use i as rem_ptr in final + + //--- final --- + + blocks++; // adjust for final block + + if ( i == SIZE256 - 1 ) + { + // only 1 vector left in buffer, all padding at once + ctx->buffer[i] = m512_const2_64( (uint64_t)blocks << 56, 0x80 ); + } + else + { + // add first padding + ctx->buffer[i] = m512_const4_64( 0, 0x80, 0, 0x80 ); + // add zero padding + for ( i += 1; i < SIZE256 - 1; i++ ) + ctx->buffer[i] = m512_zero; + + // add length padding, second last byte is zero unless blocks > 255 + ctx->buffer[i] = m512_const2_64( (uint64_t)blocks << 56, 0 ); + } + +// digest final padding block and do output transform + TF512_4way( ctx->chaining, ctx->buffer ); + + OF512_4way( ctx->chaining ); + + // store hash result in output + for ( i = 0; i < hashlen_m128i; i++ ) + casti_m512i( output, i ) = ctx->chaining[ hash_offset + i ]; + + return 0; +} + int groestl256_4way_update_close( groestl256_4way_context* ctx, void* output, const void* input, uint64_t databitlen ) { @@ -75,11 +142,11 @@ int groestl256_4way_update_close( groestl256_4way_context* ctx, void* output, blocks++; // adjust for final block if ( i == SIZE256 - 1 ) - { + { // only 1 vector left in buffer, all padding at once ctx->buffer[i] = m512_const1_128( _mm_set_epi8( blocks, blocks>>8,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) ); - } + } else { // add first padding diff --git a/algo/groestl/groestl256-hash-4way.h b/algo/groestl/groestl256-hash-4way.h index 7e49b21..907a64b 100644 --- a/algo/groestl/groestl256-hash-4way.h +++ b/algo/groestl/groestl256-hash-4way.h @@ -71,5 +71,8 @@ int groestl256_4way_init( groestl256_4way_context*, uint64_t ); int groestl256_4way_update_close( groestl256_4way_context*, void*, const void*, uint64_t ); +int groestl256_4way_full( groestl256_4way_context*, void*, + const void*, uint64_t ); + #endif #endif diff --git a/algo/groestl/groestl256-intr-4way.h b/algo/groestl/groestl256-intr-4way.h index ef7719c..32f642b 100644 --- a/algo/groestl/groestl256-intr-4way.h +++ b/algo/groestl/groestl256-intr-4way.h @@ -14,17 +14,78 @@ #include "groestl256-hash-4way.h" #if defined(__VAES__) +static const __m128i round_const_l0[] __attribute__ ((aligned (64))) = +{ + { 0x7060504030201000, 0xffffffffffffffff }, + { 0x7161514131211101, 0xffffffffffffffff }, + { 0x7262524232221202, 0xffffffffffffffff }, + { 0x7363534333231303, 0xffffffffffffffff }, + { 0x7464544434241404, 0xffffffffffffffff }, + { 0x7565554535251505, 0xffffffffffffffff }, + { 0x7666564636261606, 0xffffffffffffffff }, + { 0x7767574737271707, 0xffffffffffffffff }, + { 0x7868584838281808, 0xffffffffffffffff }, + { 0x7969594939291909, 0xffffffffffffffff } +}; -/* global constants */ -__m512i ROUND_CONST_Lx; -__m512i ROUND_CONST_L0[ROUNDS512]; -__m512i ROUND_CONST_L7[ROUNDS512]; -//__m512i ROUND_CONST_P[ROUNDS1024]; -//__m512i ROUND_CONST_Q[ROUNDS1024]; -__m512i TRANSP_MASK; -__m512i SUBSH_MASK[8]; -__m512i ALL_1B; -__m512i ALL_FF; +static const __m128i round_const_l7[] __attribute__ ((aligned (64))) = +{ + { 0x0000000000000000, 0x8f9fafbfcfdfefff }, + { 0x0000000000000000, 0x8e9eaebecedeeefe }, + { 0x0000000000000000, 0x8d9dadbdcdddedfd }, + { 0x0000000000000000, 0x8c9cacbcccdcecfc }, + { 0x0000000000000000, 0x8b9babbbcbdbebfb }, + { 0x0000000000000000, 0x8a9aaabacadaeafa }, + { 0x0000000000000000, 0x8999a9b9c9d9e9f9 }, + { 0x0000000000000000, 0x8898a8b8c8d8e8f8 }, + { 0x0000000000000000, 0x8797a7b7c7d7e7f7 }, + { 0x0000000000000000, 0x8696a6b6c6d6e6f6 } +}; + +static const __m512i TRANSP_MASK = { 0x0d0509010c040800, 0x0f070b030e060a02, + 0x1d1519111c141810, 0x1f171b131e161a12, + 0x2d2529212c242820, 0x2f272b232e262a22, + 0x3d3539313c343830, 0x3f373b333e363a32 }; + +static const __m512i SUBSH_MASK0 = { 0x0c0f0104070b0e00, 0x03060a0d08020509, + 0x1c1f1114171b1e10, 0x13161a1d18121519, + 0x2c2f2124272b2e20, 0x23262a2d28222529, + 0x3c3f3134373b3e30, 0x33363a3d38323539 }; + +static const __m512i SUBSH_MASK1 = { 0x0e090205000d0801, 0x04070c0f0a03060b, + 0x1e191215101d1801, 0x14171c1f1a13161b, + 0x2e292225202d2821, 0x24272c2f2a23262b, + 0x3e393235303d3831, 0x34373c3f3a33363b }; + +static const __m512i SUBSH_MASK2 = { 0x080b0306010f0a02, 0x05000e090c04070d, + 0x181b1316111f1a12, 0x15101e191c14171d, + 0x282b2326212f2a22, 0x25202e292c24272d, + 0x383b3336313f3a32, 0x35303e393c34373d }; + +static const __m512i SUBSH_MASK3 = { 0x0a0d040702090c03, 0x0601080b0e05000f, + 0x1a1d141712191c13, 0x1611181b1e15101f, + 0x2a2d242722292c23, 0x2621282b2e25202f, + 0x3a3d343732393c33, 0x3631383b3e35303f }; + +static const __m512i SUBSH_MASK4 = { 0x0b0e0500030a0d04, 0x0702090c0f060108, + 0x1b1e1510131a1d14, 0x1712191c1f161118, + 0x2b2e2520232a2d24, 0x2722292c2f262128, + 0x3b3e3530333a3d34, 0x3732393c3f363138 }; + +static const __m512i SUBSH_MASK5 = { 0x0d080601040c0f05, 0x00030b0e0907020a, + 0x1d181611141c1f15, 0x10131b1e1917121a, + 0x2d282621242c2f25, 0x20232b2e2927222a, + 0x3d383631343c3f35, 0x30333b3e3937323a }; + +static const __m512i SUBSH_MASK6 = { 0x0f0a0702050e0906, 0x01040d080b00030c, + 0x1f1a1712151e1916, 0x11141d181b10131c, + 0x2f2a2722252e2926, 0x21242d282b20232c, + 0x3f3a3732353e3936, 0x31343d383b30333c }; + +static const __m512i SUBSH_MASK7 = { 0x090c000306080b07, 0x02050f0a0d01040e, + 0x191c101316181b17, 0x12151f1a1d11141e, + 0x292c202326282b27, 0x22252f2a2d21242e, + 0x393c303336383b37, 0x32353f3a3d31343e }; #define tos(a) #a #define tostr(a) tos(a) @@ -40,8 +101,6 @@ __m512i ALL_FF; i = _mm512_xor_si512(i, j);\ } - /**/ - /* Yet another implementation of MixBytes. This time we use the formulae (3) from the paper "Byte Slicing Groestl". Input: a0, ..., a7 @@ -155,95 +214,36 @@ __m512i ALL_FF; b1 = _mm512_xor_si512(b1, a4);\ }/*MixBytes*/ -// calculate the round constants seperately and load at startup - -#define SET_CONSTANTS(){\ - ALL_1B = _mm512_set1_epi32( 0x1b1b1b1b );\ - TRANSP_MASK = _mm512_set_epi32( \ - 0x3f373b33, 0x3e363a32, 0x3d353931, 0x3c343830, \ - 0x2f272b23, 0x2e262a22, 0x2d252921, 0x2c242820, \ - 0x1f171b13, 0x1e161a12, 0x1d151911, 0x1c141810, \ - 0x0f070b03, 0x0e060a02, 0x0d050901, 0x0c040800 ); \ - SUBSH_MASK[0] = _mm512_set_epi32( \ - 0x33363a3d, 0x38323539, 0x3c3f3134, 0x373b3e30, \ - 0x23262a2d, 0x28222529, 0x2c2f2124, 0x272b2e20, \ - 0x13161a1d, 0x18121519, 0x1c1f1114, 0x171b1e10, \ - 0x03060a0d, 0x08020509, 0x0c0f0104, 0x070b0e00 ); \ - SUBSH_MASK[1] = _mm512_set_epi32( \ - 0x34373c3f, 0x3a33363b, 0x3e393235, 0x303d3831, \ - 0x24272c2f, 0x2a23262b, 0x2e292225, 0x202d2821, \ - 0x14171c1f, 0x1a13161b, 0x1e191215, 0x101d1801, \ - 0x04070c0f, 0x0a03060b, 0x0e090205, 0x000d0801 );\ - SUBSH_MASK[2] = _mm512_set_epi32( \ - 0x35303e39, 0x3c34373d, 0x383b3336, 0x313f3a32, \ - 0x25202e29, 0x2c24272d, 0x282b2326, 0x212f2a22, \ - 0x15101e19, 0x1c14171d, 0x181b1316, 0x111f1a12, \ - 0x05000e09, 0x0c04070d, 0x080b0306, 0x010f0a02 );\ - SUBSH_MASK[3] = _mm512_set_epi32( \ - 0x3631383b, 0x3e35303f, 0x3a3d3437, 0x32393c33, \ - 0x2621282b, 0x2e25202f, 0x2a2d2427, 0x22292c23, \ - 0x1611181b, 0x1e15101f, 0x1a1d1417, 0x12191c13, \ - 0x0601080b, 0x0e05000f, 0x0a0d0407, 0x02090c03 );\ - SUBSH_MASK[4] = _mm512_set_epi32( \ - 0x3732393c, 0x3f363138, 0x3b3e3530, 0x333a3d34, \ - 0x2722292c, 0x2f262128, 0x2b2e2520, 0x232a2d24, \ - 0x1712191c, 0x1f161118, 0x1b1e1510, 0x131a1d14, \ - 0x0702090c, 0x0f060108, 0x0b0e0500, 0x030a0d04 );\ - SUBSH_MASK[5] = _mm512_set_epi32( \ - 0x30333b3e, 0x3937323a, 0x3d383631, 0x343c3f35, \ - 0x20232b2e, 0x2927222a, 0x2d282621, 0x242c2f25, \ - 0x10131b1e, 0x1917121a, 0x1d181611, 0x141c1f15, \ - 0x00030b0e, 0x0907020a, 0x0d080601, 0x040c0f05 );\ - SUBSH_MASK[6] = _mm512_set_epi32( \ - 0x31343d38, 0x3b30333c, 0x3f3a3732, 0x353e3936, \ - 0x21242d28, 0x2b20232c, 0x2f2a2722, 0x252e2926, \ - 0x11141d18, 0x1b10131c, 0x1f1a1712, 0x151e1916, \ - 0x01040d08, 0x0b00030c, 0x0f0a0702, 0x050e0906 );\ - SUBSH_MASK[7] = _mm512_set_epi32( \ - 0x32353f3a, 0x3d31343e, 0x393c3033, 0x36383b37, \ - 0x22252f2a, 0x2d21242e, 0x292c2023, 0x26282b27, \ - 0x12151f1a, 0x1d11141e, 0x191c1013, 0x16181b17, \ - 0x02050f0a, 0x0d01040e, 0x090c0003, 0x06080b07 );\ - for ( i = 0; i < ROUNDS512; i++ ) \ - {\ - ROUND_CONST_L0[i] = _mm512_set4_epi32( 0xffffffff, 0xffffffff, \ - 0x70605040 ^ ( i * 0x01010101 ), 0x30201000 ^ ( i * 0x01010101 ) ); \ - ROUND_CONST_L7[i] = _mm512_set4_epi32( 0x8f9fafbf ^ ( i * 0x01010101 ), \ - 0xcfdfefff ^ ( i * 0x01010101 ), 0x00000000, 0x00000000 ); \ - }\ - ROUND_CONST_Lx = _mm512_set4_epi32( 0xffffffff, 0xffffffff, \ - 0x00000000, 0x00000000 ); \ -}while(0);\ #define ROUND(i, a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\ /* AddRoundConstant */\ - b1 = ROUND_CONST_Lx;\ - a0 = _mm512_xor_si512( a0, (ROUND_CONST_L0[i]) );\ + b1 = m512_const2_64( 0xffffffffffffffff, 0 ); \ + a0 = _mm512_xor_si512( a0, m512_const1_128( round_const_l0[i] ) );\ a1 = _mm512_xor_si512( a1, b1 );\ a2 = _mm512_xor_si512( a2, b1 );\ a3 = _mm512_xor_si512( a3, b1 );\ a4 = _mm512_xor_si512( a4, b1 );\ a5 = _mm512_xor_si512( a5, b1 );\ a6 = _mm512_xor_si512( a6, b1 );\ - a7 = _mm512_xor_si512( a7, (ROUND_CONST_L7[i]) );\ + a7 = _mm512_xor_si512( a7, m512_const1_128( round_const_l7[i] ) );\ \ /* ShiftBytes + SubBytes (interleaved) */\ b0 = _mm512_xor_si512( b0, b0 );\ - a0 = _mm512_shuffle_epi8( a0, (SUBSH_MASK[0]) );\ + a0 = _mm512_shuffle_epi8( a0, SUBSH_MASK0 );\ a0 = _mm512_aesenclast_epi128(a0, b0 );\ - a1 = _mm512_shuffle_epi8( a1, (SUBSH_MASK[1]) );\ + a1 = _mm512_shuffle_epi8( a1, SUBSH_MASK1 );\ a1 = _mm512_aesenclast_epi128(a1, b0 );\ - a2 = _mm512_shuffle_epi8( a2, (SUBSH_MASK[2]) );\ + a2 = _mm512_shuffle_epi8( a2, SUBSH_MASK2 );\ a2 = _mm512_aesenclast_epi128(a2, b0 );\ - a3 = _mm512_shuffle_epi8( a3, (SUBSH_MASK[3]) );\ + a3 = _mm512_shuffle_epi8( a3, SUBSH_MASK3 );\ a3 = _mm512_aesenclast_epi128(a3, b0 );\ - a4 = _mm512_shuffle_epi8( a4, (SUBSH_MASK[4]) );\ + a4 = _mm512_shuffle_epi8( a4, SUBSH_MASK4 );\ a4 = _mm512_aesenclast_epi128(a4, b0 );\ - a5 = _mm512_shuffle_epi8( a5, (SUBSH_MASK[5]) );\ + a5 = _mm512_shuffle_epi8( a5, SUBSH_MASK5 );\ a5 = _mm512_aesenclast_epi128(a5, b0 );\ - a6 = _mm512_shuffle_epi8( a6, (SUBSH_MASK[6]) );\ + a6 = _mm512_shuffle_epi8( a6, SUBSH_MASK6 );\ a6 = _mm512_aesenclast_epi128(a6, b0 );\ - a7 = _mm512_shuffle_epi8( a7, (SUBSH_MASK[7]) );\ + a7 = _mm512_shuffle_epi8( a7, SUBSH_MASK7 );\ a7 = _mm512_aesenclast_epi128( a7, b0 );\ \ /* MixBytes */\ @@ -390,29 +390,6 @@ __m512i ALL_FF; }/**/ - -void INIT256_4way( __m512i* chaining ) -{ - static __m512i xmm0, xmm2, xmm6, xmm7; - static __m512i xmm12, xmm13, xmm14, xmm15; - - /* load IV into registers xmm12 - xmm15 */ - xmm12 = chaining[0]; - xmm13 = chaining[1]; - xmm14 = chaining[2]; - xmm15 = chaining[3]; - - /* transform chaining value from column ordering into row ordering */ - /* we put two rows (64 bit) of the IV into one 128-bit XMM register */ - Matrix_Transpose_A(xmm12, xmm13, xmm14, xmm15, xmm2, xmm6, xmm7, xmm0); - - /* store transposed IV */ - chaining[0] = xmm12; - chaining[1] = xmm2; - chaining[2] = xmm6; - chaining[3] = xmm7; -} - void TF512_4way( __m512i* chaining, __m512i* message ) { static __m512i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; diff --git a/algo/groestl/groestl512-hash-4way.c b/algo/groestl/groestl512-hash-4way.c index d3b5ca0..8e5e139 100644 --- a/algo/groestl/groestl512-hash-4way.c +++ b/algo/groestl/groestl512-hash-4way.c @@ -19,10 +19,6 @@ int groestl512_4way_init( groestl512_4way_context* ctx, uint64_t hashlen ) { - int i; - - SET_CONSTANTS(); - if (ctx->chaining == NULL || ctx->buffer == NULL) return 1; @@ -99,7 +95,6 @@ int groestl512_4way_full( groestl512_4way_context* ctx, void* output, // --- init --- - SET_CONSTANTS(); memset_zero_512( ctx->chaining, SIZE512 ); memset_zero_512( ctx->buffer, SIZE512 ); ctx->chaining[ 6 ] = m512_const2_64( 0x0200000000000000, 0 ); diff --git a/algo/groestl/groestl512-intr-4way.h b/algo/groestl/groestl512-intr-4way.h index e63744c..a645379 100644 --- a/algo/groestl/groestl512-intr-4way.h +++ b/algo/groestl/groestl512-intr-4way.h @@ -15,16 +15,86 @@ #if defined(__VAES__) -/* global constants */ -__m512i ROUND_CONST_Lx; -//__m128i ROUND_CONST_L0[ROUNDS512]; -//__m128i ROUND_CONST_L7[ROUNDS512]; -__m512i ROUND_CONST_P[ROUNDS1024]; -__m512i ROUND_CONST_Q[ROUNDS1024]; -__m512i TRANSP_MASK; -__m512i SUBSH_MASK[8]; -__m512i ALL_1B; -__m512i ALL_FF; +static const __m128i round_const_p[] __attribute__ ((aligned (64))) = +{ + { 0x7060504030201000, 0xf0e0d0c0b0a09080 }, + { 0x7161514131211101, 0xf1e1d1c1b1a19181 }, + { 0x7262524232221202, 0xf2e2d2c2b2a29282 }, + { 0x7363534333231303, 0xf3e3d3c3b3a39383 }, + { 0x7464544434241404, 0xf4e4d4c4b4a49484 }, + { 0x7565554535251505, 0xf5e5d5c5b5a59585 }, + { 0x7666564636261606, 0xf6e6d6c6b6a69686 }, + { 0x7767574737271707, 0xf7e7d7c7b7a79787 }, + { 0x7868584838281808, 0xf8e8d8c8b8a89888 }, + { 0x7969594939291909, 0xf9e9d9c9b9a99989 }, + { 0x7a6a5a4a3a2a1a0a, 0xfaeadacabaaa9a8a }, + { 0x7b6b5b4b3b2b1b0b, 0xfbebdbcbbbab9b8b }, + { 0x7c6c5c4c3c2c1c0c, 0xfcecdcccbcac9c8c }, + { 0x7d6d5d4d3d2d1d0d, 0xfdedddcdbdad9d8d } +}; + +static const __m128i round_const_q[] __attribute__ ((aligned (64))) = +{ + { 0x8f9fafbfcfdfefff, 0x0f1f2f3f4f5f6f7f }, + { 0x8e9eaebecedeeefe, 0x0e1e2e3e4e5e6e7e }, + { 0x8d9dadbdcdddedfd, 0x0d1d2d3d4d5d6d7d }, + { 0x8c9cacbcccdcecfc, 0x0c1c2c3c4c5c6c7c }, + { 0x8b9babbbcbdbebfb, 0x0b1b2b3b4b5b6b7b }, + { 0x8a9aaabacadaeafa, 0x0a1a2a3a4a5a6a7a }, + { 0x8999a9b9c9d9e9f9, 0x0919293949596979 }, + { 0x8898a8b8c8d8e8f8, 0x0818283848586878 }, + { 0x8797a7b7c7d7e7f7, 0x0717273747576777 }, + { 0x8696a6b6c6d6e6f6, 0x0616263646566676 }, + { 0x8595a5b5c5d5e5f5, 0x0515253545556575 }, + { 0x8494a4b4c4d4e4f4, 0x0414243444546474 }, + { 0x8393a3b3c3d3e3f3, 0x0313233343536373 }, + { 0x8292a2b2c2d2e2f2, 0x0212223242526272 } +}; + +static const __m512i TRANSP_MASK = { 0x0d0509010c040800, 0x0f070b030e060a02, + 0x1d1519111c141810, 0x1f171b131e161a12, + 0x2d2529212c242820, 0x2f272b232e262a22, + 0x3d3539313c343830, 0x3f373b333e363a32 }; + +static const __m512i SUBSH_MASK0 = { 0x0b0e0104070a0d00, 0x0306090c0f020508, + 0x1b1e1114171a1d10, 0x1316191c1f121518, + 0x2b2e2124272a2d20, 0x2326292c2f222528, + 0x3b3e3134373a3d30, 0x3336393c3f323538 }; + +static const __m512i SUBSH_MASK1 = { 0x0c0f0205080b0e01, 0x04070a0d00030609, + 0x1c1f1215181b1e11, 0x14171a1d10131619, + 0x2c2f2225282b2e21, 0x24272a2d20232629, + 0x3c3f3235383b3e31, 0x34373a3d30333639 }; + +static const __m512i SUBSH_MASK2 = { 0x0d000306090c0f02, 0x05080b0e0104070a, + 0x1d101316191c1f12, 0x15181b1e1114171a, + 0x2d202326292c2f22, 0x25282b2e2124272a, + 0x3d303336393c3f32, 0x35383b3e3134373a }; + +static const __m512i SUBSH_MASK3 = { 0x0e0104070a0d0003, 0x06090c0f0205080b, + 0x1e1114171a1d1013, 0x16191c1f1215181b, + 0x2e2124272a2d2023, 0x26292c2f2225282b, + 0x3e3134373a3d3033, 0x36393c3f3235383b }; + +static const __m512i SUBSH_MASK4 = { 0x0f0205080b0e0104, 0x070a0d000306090c, + 0x1f1215181b1e1114, 0x171a1d101316191c, + 0x2f2225282b2e2124, 0x272a2d202326292c, + 0x3f3235383b3e3134, 0x373a3d303336393c }; + +static const __m512i SUBSH_MASK5 = { 0x000306090c0f0205, 0x080b0e0104070a0d, + 0x101316191c1f1215, 0x181b1e1114171a1d, + 0x202326292c2f2225, 0x282b2e2124272a2d, + 0x303336393c3f3235, 0x383b3e3134373a3d }; + +static const __m512i SUBSH_MASK6 = { 0x0104070a0d000306, 0x090c0f0205080b0e, + 0x1114171a1d101316, 0x191c1f1215181b1e, + 0x2124272a2d202326, 0x292c2f2225282b2e, + 0x3134373a3d303336, 0x393c3f3235383b3e }; + +static const __m512i SUBSH_MASK7 = { 0x06090c0f0205080b, 0x0e0104070a0d0003, + 0x16191c1f1215181b, 0x1e1114171a1d1013, + 0x26292c2f2225282b, 0x2e2124272a2d2023, + 0x36393c3f3235383b, 0x3e3134373a3d3033 }; #define tos(a) #a #define tostr(a) tos(a) @@ -155,69 +225,6 @@ __m512i ALL_FF; b1 = _mm512_xor_si512(b1, a4);\ }/*MixBytes*/ -// calculate the round constants seperately and load at startup - -#define SET_CONSTANTS(){\ - ALL_FF = _mm512_set1_epi32( 0xffffffff );\ - ALL_1B = _mm512_set1_epi32( 0x1b1b1b1b );\ - TRANSP_MASK = _mm512_set_epi32( \ - 0x3f373b33, 0x3e363a32, 0x3d353931, 0x3c343830, \ - 0x2f272b23, 0x2e262a22, 0x2d252921, 0x2c242820, \ - 0x1f171b13, 0x1e161a12, 0x1d151911, 0x1c141810, \ - 0x0f070b03, 0x0e060a02, 0x0d050901, 0x0c040800 ); \ - SUBSH_MASK[0] = _mm512_set_epi32( \ - 0x3336393c, 0x3f323538, 0x3b3e3134, 0x373a3d30, \ - 0x2326292c, 0x2f222528, 0x2b2e2124, 0x272a2d20, \ - 0x1316191c, 0x1f121518, 0x1b1e1114, 0x171a1d10, \ - 0x0306090c, 0x0f020508, 0x0b0e0104, 0x070a0d00 ); \ - SUBSH_MASK[1] = _mm512_set_epi32( \ - 0x34373a3d, 0x30333639, 0x3c3f3235, 0x383b3e31, \ - 0x24272a2d, 0x20232629, 0x2c2f2225, 0x282b2e21, \ - 0x14171a1d, 0x10131619, 0x1c1f1215, 0x181b1e11, \ - 0x04070a0d, 0x00030609, 0x0c0f0205, 0x080b0e01 ); \ - SUBSH_MASK[2] = _mm512_set_epi32( \ - 0x35383b3e, 0x3134373a, 0x3d303336, 0x393c3f32, \ - 0x25282b2e, 0x2124272a, 0x2d202326, 0x292c2f22, \ - 0x15181b1e, 0x1114171a, 0x1d101316, 0x191c1f12, \ - 0x05080b0e, 0x0104070a, 0x0d000306, 0x090c0f02 ); \ - SUBSH_MASK[3] = _mm512_set_epi32( \ - 0x36393c3f, 0x3235383b, 0x3e313437, 0x3a3d3033, \ - 0x26292c2f, 0x2225282b, 0x2e212427, 0x2a2d2023, \ - 0x16191c1f, 0x1215181b, 0x1e111417, 0x1a1d1013, \ - 0x06090c0f, 0x0205080b, 0x0e010407, 0x0a0d0003 ); \ - SUBSH_MASK[4] = _mm512_set_epi32( \ - 0x373a3d30, 0x3336393c, 0x3f323538, 0x3b3e3134, \ - 0x272a2d20, 0x2326292c, 0x2f222528, 0x2b2e2124, \ - 0x171a1d10, 0x1316191c, 0x1f121518, 0x1b1e1114, \ - 0x070a0d00, 0x0306090c, 0x0f020508, 0x0b0e0104 ); \ - SUBSH_MASK[5] = _mm512_set_epi32( \ - 0x383b3e31, 0x34373a3d, 0x30333639, 0x3c3f3235, \ - 0x282b2e21, 0x24272a2d, 0x20232629, 0x2c2f2225, \ - 0x181b1e11, 0x14171a1d, 0x10131619, 0x1c1f1215, \ - 0x080b0e01, 0x04070a0d, 0x00030609, 0x0c0f0205 ); \ - SUBSH_MASK[6] = _mm512_set_epi32( \ - 0x393c3f32, 0x35383b3e, 0x3134373a, 0x3d303336, \ - 0x292c2f22, 0x25282b2e, 0x2124272a, 0x2d202326, \ - 0x191c1f12, 0x15181b1e, 0x1114171a, 0x1d101316, \ - 0x090c0f02, 0x05080b0e, 0x0104070a, 0x0d000306 ); \ - SUBSH_MASK[7] = _mm512_set_epi32( \ - 0x3e313437, 0x3a3d3033, 0x36393c3f, 0x3235383b, \ - 0x2e212427, 0x2a2d2023, 0x26292c2f, 0x2225282b, \ - 0x1e111417, 0x1a1d1013, 0x16191c1f, 0x1215181b, \ - 0x0e010407, 0x0a0d0003, 0x06090c0f, 0x0205080b ); \ - for( i = 0; i < ROUNDS1024; i++ ) \ - { \ - ROUND_CONST_P[i] = _mm512_set4_epi32( 0xf0e0d0c0 ^ (i * 0x01010101), \ - 0xb0a09080 ^ (i * 0x01010101), \ - 0x70605040 ^ (i * 0x01010101), \ - 0x30201000 ^ (i * 0x01010101) ); \ - ROUND_CONST_Q[i] = _mm512_set4_epi32( 0x0f1f2f3f ^ (i * 0x01010101), \ - 0x4f5f6f7f ^ (i * 0x01010101), \ - 0x8f9fafbf ^ (i * 0x01010101), \ - 0xcfdfefff ^ (i * 0x01010101));\ - } \ -}while(0);\ - /* one round * a0-a7 = input rows * b0-b7 = output rows @@ -242,30 +249,32 @@ __m512i ALL_FF; for ( round_counter = 0; round_counter < 14; round_counter += 2 ) \ { \ /* AddRoundConstant P1024 */\ - xmm8 = _mm512_xor_si512( xmm8, ( ROUND_CONST_P[ round_counter ] ) );\ + xmm8 = _mm512_xor_si512( xmm8, m512_const1_128( \ + casti_m128i( round_const_p, round_counter ) ) ); \ /* ShiftBytes P1024 + pre-AESENCLAST */\ - xmm8 = _mm512_shuffle_epi8( xmm8, ( SUBSH_MASK[0] ) );\ - xmm9 = _mm512_shuffle_epi8( xmm9, ( SUBSH_MASK[1] ) );\ - xmm10 = _mm512_shuffle_epi8( xmm10, ( SUBSH_MASK[2] ) );\ - xmm11 = _mm512_shuffle_epi8( xmm11, ( SUBSH_MASK[3] ) );\ - xmm12 = _mm512_shuffle_epi8( xmm12, ( SUBSH_MASK[4] ) );\ - xmm13 = _mm512_shuffle_epi8( xmm13, ( SUBSH_MASK[5] ) );\ - xmm14 = _mm512_shuffle_epi8( xmm14, ( SUBSH_MASK[6] ) );\ - xmm15 = _mm512_shuffle_epi8( xmm15, ( SUBSH_MASK[7] ) );\ + xmm8 = _mm512_shuffle_epi8( xmm8, SUBSH_MASK0 ); \ + xmm9 = _mm512_shuffle_epi8( xmm9, SUBSH_MASK1 );\ + xmm10 = _mm512_shuffle_epi8( xmm10, SUBSH_MASK2 );\ + xmm11 = _mm512_shuffle_epi8( xmm11, SUBSH_MASK3 );\ + xmm12 = _mm512_shuffle_epi8( xmm12, SUBSH_MASK4 );\ + xmm13 = _mm512_shuffle_epi8( xmm13, SUBSH_MASK5 );\ + xmm14 = _mm512_shuffle_epi8( xmm14, SUBSH_MASK6 );\ + xmm15 = _mm512_shuffle_epi8( xmm15, SUBSH_MASK7 );\ /* SubBytes + MixBytes */\ SUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\ \ /* AddRoundConstant P1024 */\ - xmm0 = _mm512_xor_si512( xmm0, ( ROUND_CONST_P[ round_counter+1 ] ) );\ + xmm0 = _mm512_xor_si512( xmm0, m512_const1_128( \ + casti_m128i( round_const_p, round_counter+1 ) ) ); \ /* ShiftBytes P1024 + pre-AESENCLAST */\ - xmm0 = _mm512_shuffle_epi8( xmm0, ( SUBSH_MASK[0] ) );\ - xmm1 = _mm512_shuffle_epi8( xmm1, ( SUBSH_MASK[1] ) );\ - xmm2 = _mm512_shuffle_epi8( xmm2, ( SUBSH_MASK[2] ) );\ - xmm3 = _mm512_shuffle_epi8( xmm3, ( SUBSH_MASK[3] ) );\ - xmm4 = _mm512_shuffle_epi8( xmm4, ( SUBSH_MASK[4] ) );\ - xmm5 = _mm512_shuffle_epi8( xmm5, ( SUBSH_MASK[5] ) );\ - xmm6 = _mm512_shuffle_epi8( xmm6, ( SUBSH_MASK[6] ) );\ - xmm7 = _mm512_shuffle_epi8( xmm7, ( SUBSH_MASK[7] ) );\ + xmm0 = _mm512_shuffle_epi8( xmm0, SUBSH_MASK0 );\ + xmm1 = _mm512_shuffle_epi8( xmm1, SUBSH_MASK1 );\ + xmm2 = _mm512_shuffle_epi8( xmm2, SUBSH_MASK2 );\ + xmm3 = _mm512_shuffle_epi8( xmm3, SUBSH_MASK3 );\ + xmm4 = _mm512_shuffle_epi8( xmm4, SUBSH_MASK4 );\ + xmm5 = _mm512_shuffle_epi8( xmm5, SUBSH_MASK5 );\ + xmm6 = _mm512_shuffle_epi8( xmm6, SUBSH_MASK6 );\ + xmm7 = _mm512_shuffle_epi8( xmm7, SUBSH_MASK7 );\ /* SubBytes + MixBytes */\ SUBMIX(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\ }\ @@ -284,16 +293,17 @@ __m512i ALL_FF; xmm12 = _mm512_xor_si512( xmm12, xmm1 );\ xmm13 = _mm512_xor_si512( xmm13, xmm1 );\ xmm14 = _mm512_xor_si512( xmm14, xmm1 );\ - xmm15 = _mm512_xor_si512( xmm15, ( ROUND_CONST_Q[ round_counter ] ) );\ + xmm15 = _mm512_xor_si512( xmm15, m512_const1_128( \ + casti_m128i( round_const_q, round_counter ) ) ); \ /* ShiftBytes Q1024 + pre-AESENCLAST */\ - xmm8 = _mm512_shuffle_epi8( xmm8, ( SUBSH_MASK[1] ) );\ - xmm9 = _mm512_shuffle_epi8( xmm9, ( SUBSH_MASK[3] ) );\ - xmm10 = _mm512_shuffle_epi8( xmm10, ( SUBSH_MASK[5] ) );\ - xmm11 = _mm512_shuffle_epi8( xmm11, ( SUBSH_MASK[7] ) );\ - xmm12 = _mm512_shuffle_epi8( xmm12, ( SUBSH_MASK[0] ) );\ - xmm13 = _mm512_shuffle_epi8( xmm13, ( SUBSH_MASK[2] ) );\ - xmm14 = _mm512_shuffle_epi8( xmm14, ( SUBSH_MASK[4] ) );\ - xmm15 = _mm512_shuffle_epi8( xmm15, ( SUBSH_MASK[6] ) );\ + xmm8 = _mm512_shuffle_epi8( xmm8, SUBSH_MASK1 );\ + xmm9 = _mm512_shuffle_epi8( xmm9, SUBSH_MASK3 );\ + xmm10 = _mm512_shuffle_epi8( xmm10, SUBSH_MASK5 );\ + xmm11 = _mm512_shuffle_epi8( xmm11, SUBSH_MASK7 );\ + xmm12 = _mm512_shuffle_epi8( xmm12, SUBSH_MASK0 );\ + xmm13 = _mm512_shuffle_epi8( xmm13, SUBSH_MASK2 );\ + xmm14 = _mm512_shuffle_epi8( xmm14, SUBSH_MASK4 );\ + xmm15 = _mm512_shuffle_epi8( xmm15, SUBSH_MASK6 );\ /* SubBytes + MixBytes */\ SUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\ \ @@ -306,16 +316,17 @@ __m512i ALL_FF; xmm4 = _mm512_xor_si512( xmm4, xmm9 );\ xmm5 = _mm512_xor_si512( xmm5, xmm9 );\ xmm6 = _mm512_xor_si512( xmm6, xmm9 );\ - xmm7 = _mm512_xor_si512( xmm7, ( ROUND_CONST_Q[ round_counter+1 ] ) );\ + xmm7 = _mm512_xor_si512( xmm7, m512_const1_128( \ + casti_m128i( round_const_q, round_counter+1 ) ) ); \ /* ShiftBytes Q1024 + pre-AESENCLAST */\ - xmm0 = _mm512_shuffle_epi8( xmm0, ( SUBSH_MASK[1] ) );\ - xmm1 = _mm512_shuffle_epi8( xmm1, ( SUBSH_MASK[3] ) );\ - xmm2 = _mm512_shuffle_epi8( xmm2, ( SUBSH_MASK[5] ) );\ - xmm3 = _mm512_shuffle_epi8( xmm3, ( SUBSH_MASK[7] ) );\ - xmm4 = _mm512_shuffle_epi8( xmm4, ( SUBSH_MASK[0] ) );\ - xmm5 = _mm512_shuffle_epi8( xmm5, ( SUBSH_MASK[2] ) );\ - xmm6 = _mm512_shuffle_epi8( xmm6, ( SUBSH_MASK[4] ) );\ - xmm7 = _mm512_shuffle_epi8( xmm7, ( SUBSH_MASK[6] ) );\ + xmm0 = _mm512_shuffle_epi8( xmm0, SUBSH_MASK1 );\ + xmm1 = _mm512_shuffle_epi8( xmm1, SUBSH_MASK3 );\ + xmm2 = _mm512_shuffle_epi8( xmm2, SUBSH_MASK5 );\ + xmm3 = _mm512_shuffle_epi8( xmm3, SUBSH_MASK7 );\ + xmm4 = _mm512_shuffle_epi8( xmm4, SUBSH_MASK0 );\ + xmm5 = _mm512_shuffle_epi8( xmm5, SUBSH_MASK2 );\ + xmm6 = _mm512_shuffle_epi8( xmm6, SUBSH_MASK4 );\ + xmm7 = _mm512_shuffle_epi8( xmm7, SUBSH_MASK6 );\ /* SubBytes + MixBytes */\ SUBMIX(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\ }\ diff --git a/algo/groestl/myr-groestl.c b/algo/groestl/myr-groestl.c index f52bc96..fe5b920 100644 --- a/algo/groestl/myr-groestl.c +++ b/algo/groestl/myr-groestl.c @@ -1,4 +1,7 @@ #include "myrgr-gate.h" + +#if !defined(MYRGR_8WAY) && !defined(MYRGR_4WAY) + #include #include #include @@ -86,3 +89,4 @@ int scanhash_myriad( struct work *work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } +#endif diff --git a/algo/groestl/sph_groestl.c b/algo/groestl/sph_groestl.c index 91f75d3..8f12430 100644 --- a/algo/groestl/sph_groestl.c +++ b/algo/groestl/sph_groestl.c @@ -35,6 +35,8 @@ #include "sph_groestl.h" +#if !defined(__AES__) + #ifdef __cplusplus extern "C"{ #endif @@ -3116,4 +3118,6 @@ sph_groestl512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) #ifdef __cplusplus } + +#endif // !AES #endif diff --git a/algo/groestl/sph_groestl.h b/algo/groestl/sph_groestl.h index 9ecbff2..02465e3 100644 --- a/algo/groestl/sph_groestl.h +++ b/algo/groestl/sph_groestl.h @@ -42,6 +42,7 @@ extern "C"{ #include #include "algo/sha/sph_types.h" +#if !defined(__AES__) /** * Output size (in bits) for Groestl-224. */ @@ -326,4 +327,5 @@ void sph_groestl512_addbits_and_close( } #endif +#endif // !AES #endif diff --git a/algo/heavy/bastion.c b/algo/heavy/bastion.c deleted file mode 100644 index 9c17661..0000000 --- a/algo/heavy/bastion.c +++ /dev/null @@ -1,156 +0,0 @@ -#include "algo-gate-api.h" -#include -#include -#include -#include -#include -#include "sph_hefty1.h" -#include "algo/luffa/sph_luffa.h" -#include "algo/fugue/sph_fugue.h" -#include "algo/skein/sph_skein.h" -#include "algo/whirlpool/sph_whirlpool.h" -#include "algo/shabal/sph_shabal.h" -#include "algo/echo/sph_echo.h" -#include "algo/hamsi/sph_hamsi.h" -#include "algo/luffa/luffa_for_sse2.h" -#ifdef __AES__ - #include "algo/echo/aes_ni/hash_api.h" -#endif - -void bastionhash(void *output, const void *input) -{ - unsigned char hash[64] __attribute__ ((aligned (64))); - -#ifdef __AES__ - hashState_echo ctx_echo; -#else - sph_echo512_context ctx_echo; -#endif - hashState_luffa ctx_luffa; - sph_fugue512_context ctx_fugue; - sph_whirlpool_context ctx_whirlpool; - sph_shabal512_context ctx_shabal; - sph_hamsi512_context ctx_hamsi; - sph_skein512_context ctx_skein; - - HEFTY1(input, 80, hash); - - init_luffa( &ctx_luffa, 512 ); - update_and_final_luffa( &ctx_luffa, (BitSequence*)hash, - (const BitSequence*)hash, 64 ); - - if (hash[0] & 0x8) - { - sph_fugue512_init(&ctx_fugue); - sph_fugue512(&ctx_fugue, hash, 64); - sph_fugue512_close(&ctx_fugue, hash); - } else { - sph_skein512_init( &ctx_skein ); - sph_skein512( &ctx_skein, hash, 64 ); - sph_skein512_close( &ctx_skein, hash ); - } - - sph_whirlpool_init(&ctx_whirlpool); - sph_whirlpool(&ctx_whirlpool, hash, 64); - sph_whirlpool_close(&ctx_whirlpool, hash); - - sph_fugue512_init(&ctx_fugue); - sph_fugue512(&ctx_fugue, hash, 64); - sph_fugue512_close(&ctx_fugue, hash); - - if (hash[0] & 0x8) - { -#ifdef __AES__ - init_echo( &ctx_echo, 512 ); - update_final_echo ( &ctx_echo,(BitSequence*)hash, - (const BitSequence*)hash, 512 ); -#else - sph_echo512_init(&ctx_echo); - sph_echo512(&ctx_echo, hash, 64); - sph_echo512_close(&ctx_echo, hash); -#endif - } else { - init_luffa( &ctx_luffa, 512 ); - update_and_final_luffa( &ctx_luffa, (BitSequence*)hash, - (const BitSequence*)hash, 64 ); - } - - sph_shabal512_init(&ctx_shabal); - sph_shabal512(&ctx_shabal, hash, 64); - sph_shabal512_close(&ctx_shabal, hash); - - sph_skein512_init( &ctx_skein ); - sph_skein512( &ctx_skein, hash, 64 ); - sph_skein512_close( &ctx_skein, hash ); - - if (hash[0] & 0x8) - { - sph_shabal512_init(&ctx_shabal); - sph_shabal512(&ctx_shabal, hash, 64); - sph_shabal512_close(&ctx_shabal, hash); - } else { - sph_whirlpool_init(&ctx_whirlpool); - sph_whirlpool(&ctx_whirlpool, hash, 64); - sph_whirlpool_close(&ctx_whirlpool, hash); - } - - sph_shabal512_init(&ctx_shabal); - sph_shabal512(&ctx_shabal, hash, 64); - sph_shabal512_close(&ctx_shabal, hash); - - if (hash[0] & 0x8) - { - sph_hamsi512_init(&ctx_hamsi); - sph_hamsi512(&ctx_hamsi, hash, 64); - sph_hamsi512_close(&ctx_hamsi, hash); - } else { - init_luffa( &ctx_luffa, 512 ); - update_and_final_luffa( &ctx_luffa, (BitSequence*)hash, - (const BitSequence*)hash, 64 ); - } - - memcpy(output, hash, 32); -} - -int scanhash_bastion( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr) -{ - uint32_t _ALIGN(64) hash32[8]; - uint32_t _ALIGN(64) endiandata[20]; - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - int thr_id = mythr->id; // thr_id arg is deprecated - - const uint32_t Htarg = ptarget[7]; - const uint32_t first_nonce = pdata[19]; - - uint32_t n = first_nonce; - - for (int i=0; i < 19; i++) - be32enc(&endiandata[i], pdata[i]); - - do { - be32enc(&endiandata[19], n); - bastionhash(hash32, endiandata); - if (hash32[7] < Htarg && fulltest(hash32, ptarget)) { - pdata[19] = n; - submit_solution( work, hash32, mythr ); - } - n++; - - } while (n < max_nonce && !work_restart[thr_id].restart); - - *hashes_done = n - first_nonce + 1; - pdata[19] = n; - - return 0; -} - -bool register_bastion_algo( algo_gate_t* gate ) -{ - gate->optimizations = SSE2_OPT | AES_OPT; - gate->scanhash = (void*)&scanhash_bastion; - gate->hash = (void*)&bastionhash; - return true; -}; - diff --git a/algo/heavy/heavy.c b/algo/heavy/heavy.c deleted file mode 100644 index 68e5bc7..0000000 --- a/algo/heavy/heavy.c +++ /dev/null @@ -1,111 +0,0 @@ -#include -#include -#include - -#include "algo-gate-api.h" -#include "sph_hefty1.h" -#include "algo/keccak/sph_keccak.h" -#include "algo/blake/sph_blake.h" -#include "algo/groestl/sph_groestl.h" - -/* Combines top 64-bits from each hash into a single hash */ -static void combine_hashes(uint32_t *out, uint32_t *hash1, uint32_t *hash2, uint32_t *hash3, uint32_t *hash4) -{ - uint32_t *hash[4] = { hash1, hash2, hash3, hash4 }; - - /* Transpose first 64 bits of each hash into out */ - memset(out, 0, 32); - int bits = 0; - for (unsigned int i = 7; i >= 6; i--) { - for (uint32_t mask = 0x80000000; mask; mask >>= 1) { - for (unsigned int k = 0; k < 4; k++) { - out[(255 - bits)/32] <<= 1; - if ((hash[k][i] & mask) != 0) - out[(255 - bits)/32] |= 1; - bits++; - } - } - } -} - -extern void heavyhash(unsigned char* output, const unsigned char* input, int len) -{ - unsigned char hash1[32]; - HEFTY1(input, len, hash1); - -// HEFTY1 is new, so take an extra security measure to eliminate -// * the possiblity of collisions: -// * -// * Hash(x) = SHA256(x + HEFTY1(x)) -// * -// * N.B. '+' is concatenation. -// - unsigned char hash2[32];; - SHA256_CTX ctx; - SHA256_Init(&ctx); - SHA256_Update(&ctx, input, len); - SHA256_Update(&ctx, hash1, sizeof(hash1)); - SHA256_Final(hash2, &ctx); - -// * Additional security: Do not rely on a single cryptographic hash -// * function. Instead, combine the outputs of 4 of the most secure -// * cryptographic hash functions-- SHA256, KECCAK512, GROESTL512 -// * and BLAKE512. - - - uint32_t hash3[16]; - sph_keccak512_context keccakCtx; - sph_keccak512_init(&keccakCtx); - sph_keccak512(&keccakCtx, input, len); - sph_keccak512(&keccakCtx, hash1, sizeof(hash1)); - sph_keccak512_close(&keccakCtx, (void *)&hash3); - - uint32_t hash4[16]; - sph_groestl512_context groestlCtx; - sph_groestl512_init(&groestlCtx); - sph_groestl512(&groestlCtx, input, len); - sph_groestl512(&groestlCtx, hash1, sizeof(hash1)); - sph_groestl512_close(&groestlCtx, (void *)&hash4); - - uint32_t hash5[16]; - sph_blake512_context blakeCtx; - sph_blake512_init(&blakeCtx); - sph_blake512(&blakeCtx, input, len); - sph_blake512(&blakeCtx, (unsigned char *)&hash1, sizeof(hash1)); - sph_blake512_close(&blakeCtx, (void *)&hash5); - - uint32_t *final = (uint32_t *)output; - combine_hashes(final, (uint32_t *)hash2, hash3, hash4, hash5); - -} - -int scanhash_heavy( uint32_t *pdata, const uint32_t *ptarget, - uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr) -{ - uint32_t hash[8]; - uint32_t start_nonce = pdata[19]; - int thr_id = mythr->id; // thr_id arg is deprecated - - do { - heavyhash((unsigned char *)hash, (unsigned char *)pdata, 80); - - if (hash[7] <= ptarget[7]) { - if (fulltest(hash, ptarget)) { - *hashes_done = pdata[19] - start_nonce; - return 1; - break; - } - } - pdata[19]++; - } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); - *hashes_done = pdata[19] - start_nonce; - return 0; -} - -bool register_heavy_algo( algo_gate_t* gate ) -{ - gate->scanhash = (void*)&scanhash_heavy; - gate->hash = (void*)&heavyhash; - return true; -}; - diff --git a/algo/jh/jha.c b/algo/jh/jha.c index d02b9e5..75becd4 100644 --- a/algo/jh/jha.c +++ b/algo/jh/jha.c @@ -1,4 +1,7 @@ #include "jha-gate.h" + +#if !defined(JHA_8WAY) && !defined(JHA_4WAY) + #include #include #include @@ -133,3 +136,4 @@ int scanhash_jha( struct work *work, uint32_t max_nonce, return 0; } +#endif diff --git a/algo/keccak/keccak-hash-4way.c b/algo/keccak/keccak-hash-4way.c index 10ac7f4..cc88332 100644 --- a/algo/keccak/keccak-hash-4way.c +++ b/algo/keccak/keccak-hash-4way.c @@ -3,6 +3,8 @@ #include "keccak-hash-4way.h" #include "keccak-gate.h" +#if defined(__AVX2__) + static const uint64_t RC[] = { 0x0000000000000001, 0x0000000000008082, 0x800000000000808A, 0x8000000080008000, @@ -239,7 +241,7 @@ keccak512_8way_close(void *cc, void *dst) #endif // AVX512 -#if defined(__AVX2__) +// AVX2 #define INPUT_BUF(size) do { \ size_t j; \ diff --git a/algo/keccak/keccak.c b/algo/keccak/keccak.c index d122ce3..2dde233 100644 --- a/algo/keccak/keccak.c +++ b/algo/keccak/keccak.c @@ -1,4 +1,6 @@ -#include "algo-gate-api.h" +#include "keccak-gate.h" + +#if !defined(KECCAK_8WAY) && !defined(KECCAK_4WAY) #include #include @@ -49,3 +51,4 @@ int scanhash_keccak( struct work *work, uint32_t max_nonce, return 0; } +#endif diff --git a/algo/keccak/sha3d.c b/algo/keccak/sha3d.c index e9fd369..43d8c4f 100644 --- a/algo/keccak/sha3d.c +++ b/algo/keccak/sha3d.c @@ -1,4 +1,7 @@ -#include "algo-gate-api.h" +#include "keccak-gate.h" + +#if !defined(KECCAK_8WAY) && !defined(KECCAK_4WAY) + #include #include #include @@ -48,3 +51,4 @@ int scanhash_sha3d( struct work *work, uint32_t max_nonce, return 0; } +#endif diff --git a/algo/luffa/luffa.c b/algo/luffa/luffa.c deleted file mode 100644 index 7d68af6..0000000 --- a/algo/luffa/luffa.c +++ /dev/null @@ -1,63 +0,0 @@ -#include "algo-gate-api.h" - -#include -#include -#include -#include - -#include "sph_luffa.h" - -void luffahash(void *output, const void *input) -{ - unsigned char _ALIGN(128) hash[64]; - sph_luffa512_context ctx_luffa; - - sph_luffa512_init(&ctx_luffa); - sph_luffa512 (&ctx_luffa, input, 80); - sph_luffa512_close(&ctx_luffa, (void*) hash); - - memcpy(output, hash, 32); -} - -int scanhash_luffa(int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done) -{ - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - - uint32_t _ALIGN(64) hash64[8]; - uint32_t _ALIGN(64) endiandata[20]; - - const uint32_t Htarg = ptarget[7]; - const uint32_t first_nonce = pdata[19]; - - uint32_t n = first_nonce; - - for (int i=0; i < 19; i++) - be32enc(&endiandata[i], pdata[i]); - - do { - be32enc(&endiandata[19], n); - luffahash(hash64, endiandata); - if (hash64[7] < Htarg && fulltest(hash64, ptarget)) { - *hashes_done = n - first_nonce + 1; - pdata[19] = n; - return true; - } - n++; - - } while (n < max_nonce && !work_restart[thr_id].restart); - - *hashes_done = n - first_nonce + 1; - pdata[19] = n; - - return 0; -} - -bool register_luffa_algo( algo_gate_t* gate ) -{ - gate->scanhash = (void*)&scanhash_luffa; - gate->hash = (void*)&luffahash; - return true; -}; - diff --git a/algo/luffa/luffa_for_sse2.h b/algo/luffa/luffa_for_sse2.h index 5d0cb75..f20a400 100644 --- a/algo/luffa/luffa_for_sse2.h +++ b/algo/luffa/luffa_for_sse2.h @@ -1,3 +1,6 @@ +#if !defined(LUFFA_FOR_SSE2_H__) +#define LUFFA_FOR_SSE2_H__ 1 + /* * luffa_for_sse2.h * Version 2.0 (Sep 15th 2009) @@ -48,8 +51,6 @@ typedef struct { uint32 buffer[8] __attribute((aligned(32))); __m128i chainv[10] __attribute((aligned(32))); /* Chaining values */ -// uint64 bitlen[2]; /* Message length in bits */ -// uint32 rembitlen; /* Length of buffer data to be hashed */ int hashbitlen; int rembytes; } hashState_luffa; @@ -67,4 +68,4 @@ HashReturn update_and_final_luffa( hashState_luffa *state, BitSequence* output, int luffa_full( hashState_luffa *state, BitSequence* output, int hashbitlen, const BitSequence* data, size_t inlen ); - +#endif // LUFFA_FOR_SSE2_H___ diff --git a/algo/lyra2/allium-4way.c b/algo/lyra2/allium-4way.c index 203ac89..44c568e 100644 --- a/algo/lyra2/allium-4way.c +++ b/algo/lyra2/allium-4way.c @@ -115,9 +115,8 @@ void allium_16way_hash( void *state, const void *input ) intrlv_4x128( vhashA, hash0, hash1, hash2, hash3, 256 ); intrlv_4x128( vhashB, hash4, hash5, hash6, hash7, 256 ); - cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 ); - cube_4way_init( &ctx.cube, 256, 16, 32 ); - cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 ); + cube_4way_full( &ctx.cube, vhashA, 256, vhashA, 32 ); + cube_4way_full( &ctx.cube, vhashB, 256, vhashB, 32 ); dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 ); dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 ); @@ -125,10 +124,8 @@ void allium_16way_hash( void *state, const void *input ) intrlv_4x128( vhashA, hash8, hash9, hash10, hash11, 256 ); intrlv_4x128( vhashB, hash12, hash13, hash14, hash15, 256 ); - cube_4way_init( &ctx.cube, 256, 16, 32 ); - cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 ); - cube_4way_init( &ctx.cube, 256, 16, 32 ); - cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 ); + cube_4way_full( &ctx.cube, vhashA, 256, vhashA, 32 ); + cube_4way_full( &ctx.cube, vhashB, 256, vhashB, 32 ); dintrlv_4x128( hash8, hash9, hash10, hash11, vhashA, 256 ); dintrlv_4x128( hash12, hash13, hash14, hash15, vhashB, 256 ); @@ -169,7 +166,6 @@ void allium_16way_hash( void *state, const void *input ) skein256_8way_update( &ctx.skein, vhashB, 32 ); skein256_8way_close( &ctx.skein, vhashB ); - dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, vhashA, 256 ); dintrlv_8x64( hash8, hash9, hash10, hash11, hash12, hash13, hash14, hash15, @@ -179,77 +175,43 @@ void allium_16way_hash( void *state, const void *input ) intrlv_4x128( vhash, hash0, hash1, hash2, hash3, 256 ); - groestl256_4way_update_close( &ctx.groestl, vhash, vhash, 256 ); + groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 ); dintrlv_4x128( state, state+32, state+64, state+96, vhash, 256 ); intrlv_4x128( vhash, hash4, hash5, hash6, hash7, 256 ); - groestl256_4way_init( &ctx.groestl, 32 ); - groestl256_4way_update_close( &ctx.groestl, vhash, vhash, 256 ); + groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 ); dintrlv_4x128( state+128, state+160, state+192, state+224, vhash, 256 ); intrlv_4x128( vhash, hash8, hash9, hash10, hash11, 256 ); - groestl256_4way_init( &ctx.groestl, 32 ); - groestl256_4way_update_close( &ctx.groestl, vhash, vhash, 256 ); + groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 ); dintrlv_4x128( state+256, state+288, state+320, state+352, vhash, 256 ); intrlv_4x128( vhash, hash12, hash13, hash14, hash15, 256 ); - groestl256_4way_init( &ctx.groestl, 32 ); - groestl256_4way_update_close( &ctx.groestl, vhash, vhash, 256 ); + groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 ); dintrlv_4x128( state+384, state+416, state+448, state+480, vhash, 256 ); #else - update_and_final_groestl256( &ctx.groestl, state, hash0, 256 ); - memcpy( &ctx.groestl, &allium_16way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, state+32, hash1, 256 ); - memcpy( &ctx.groestl, &allium_16way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, state+64, hash2, 256 ); - memcpy( &ctx.groestl, &allium_16way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, state+96, hash3, 256 ); - memcpy( &ctx.groestl, &allium_16way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, state+128, hash4, 256 ); - memcpy( &ctx.groestl, &allium_16way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, state+160, hash5, 256 ); - memcpy( &ctx.groestl, &allium_16way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, state+192, hash6, 256 ); - memcpy( &ctx.groestl, &allium_16way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, state+224, hash7, 256 ); - memcpy( &ctx.groestl, &allium_16way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, state+256, hash8, 256 ); - memcpy( &ctx.groestl, &allium_16way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, state+288, hash9, 256 ); - memcpy( &ctx.groestl, &allium_16way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, state+320, hash10, 256 ); - memcpy( &ctx.groestl, &allium_16way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, state+352, hash11, 256 ); - memcpy( &ctx.groestl, &allium_16way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, state+384, hash12, 256 ); - memcpy( &ctx.groestl, &allium_16way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, state+416, hash13, 256 ); - memcpy( &ctx.groestl, &allium_16way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, state+448, hash14, 256 ); - memcpy( &ctx.groestl, &allium_16way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, state+480, hash15, 256 ); - + groestl256_full( &ctx.groestl, state, hash0, 256 ); + groestl256_full( &ctx.groestl, state+32, hash1, 256 ); + groestl256_full( &ctx.groestl, state+64, hash2, 256 ); + groestl256_full( &ctx.groestl, state+96, hash3, 256 ); + groestl256_full( &ctx.groestl, state+128, hash4, 256 ); + groestl256_full( &ctx.groestl, state+160, hash5, 256 ); + groestl256_full( &ctx.groestl, state+192, hash6, 256 ); + groestl256_full( &ctx.groestl, state+224, hash7, 256 ); + groestl256_full( &ctx.groestl, state+256, hash8, 256 ); + groestl256_full( &ctx.groestl, state+288, hash9, 256 ); + groestl256_full( &ctx.groestl, state+320, hash10, 256 ); + groestl256_full( &ctx.groestl, state+352, hash11, 256 ); + groestl256_full( &ctx.groestl, state+384, hash12, 256 ); + groestl256_full( &ctx.groestl, state+416, hash13, 256 ); + groestl256_full( &ctx.groestl, state+448, hash14, 256 ); + groestl256_full( &ctx.groestl, state+480, hash15, 256 ); #endif } @@ -393,28 +355,14 @@ void allium_8way_hash( void *hash, const void *input ) dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, 256 ); dintrlv_4x64( hash4, hash5, hash6, hash7, vhashB, 256 ); - update_and_final_groestl256( &ctx.groestl, hash0, hash0, 256 ); - memcpy( &ctx.groestl, &allium_8way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, hash1, hash1, 256 ); - memcpy( &ctx.groestl, &allium_8way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, hash2, hash2, 256 ); - memcpy( &ctx.groestl, &allium_8way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, hash3, hash3, 256 ); - memcpy( &ctx.groestl, &allium_8way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, hash4, hash4, 256 ); - memcpy( &ctx.groestl, &allium_8way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, hash5, hash5, 256 ); - memcpy( &ctx.groestl, &allium_8way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, hash6, hash6, 256 ); - memcpy( &ctx.groestl, &allium_8way_ctx.groestl, - sizeof(hashState_groestl256) ); - update_and_final_groestl256( &ctx.groestl, hash7, hash7, 256 ); + groestl256_full( &ctx.groestl, hash0, hash0, 256 ); + groestl256_full( &ctx.groestl, hash1, hash1, 256 ); + groestl256_full( &ctx.groestl, hash2, hash2, 256 ); + groestl256_full( &ctx.groestl, hash3, hash3, 256 ); + groestl256_full( &ctx.groestl, hash4, hash4, 256 ); + groestl256_full( &ctx.groestl, hash5, hash5, 256 ); + groestl256_full( &ctx.groestl, hash6, hash6, 256 ); + groestl256_full( &ctx.groestl, hash7, hash7, 256 ); } int scanhash_allium_8way( struct work *work, uint32_t max_nonce, diff --git a/algo/lyra2/allium.c b/algo/lyra2/allium.c index 593a997..40bc15f 100644 --- a/algo/lyra2/allium.c +++ b/algo/lyra2/allium.c @@ -1,4 +1,7 @@ #include "lyra2-gate.h" + +#if !( defined(ALLIUM_16WAY) || defined(ALLIUM_8WAY) || defined(ALLIUM_4WAY) ) + #include #include "algo/blake/sph_blake.h" #include "algo/keccak/sph_keccak.h" @@ -107,3 +110,4 @@ int scanhash_allium( struct work *work, uint32_t max_nonce, return 0; } +#endif diff --git a/algo/lyra2/lyra2-gate.h b/algo/lyra2/lyra2-gate.h index 89ae6da..5497e14 100644 --- a/algo/lyra2/lyra2-gate.h +++ b/algo/lyra2/lyra2-gate.h @@ -75,7 +75,6 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce, bool init_lyra2rev2_4way_ctx(); #else - void lyra2rev2_hash( void *state, const void *input ); int scanhash_lyra2rev2( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); diff --git a/algo/lyra2/lyra2h.c b/algo/lyra2/lyra2h.c index 27b5a53..7328f7d 100644 --- a/algo/lyra2/lyra2h.c +++ b/algo/lyra2/lyra2h.c @@ -1,4 +1,7 @@ #include "lyra2-gate.h" + +#if !( defined(LYRA2H_8WAY) || defined(LYRA2H_4WAY) ) + #include #include #include "lyra2.h" @@ -71,3 +74,4 @@ int scanhash_lyra2h( struct work *work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } +#endif diff --git a/algo/lyra2/lyra2rev2.c b/algo/lyra2/lyra2rev2.c index 618c045..a9435a8 100644 --- a/algo/lyra2/lyra2rev2.c +++ b/algo/lyra2/lyra2rev2.c @@ -1,4 +1,7 @@ #include "lyra2-gate.h" + +#if !( defined(LYRA2REV2_16WAY) || defined(LYRA2REV2_8WAY) || defined(LYRA2REV2_4WAY) ) + #include #include "algo/blake/sph_blake.h" #include "algo/cubehash/sph_cubehash.h" @@ -107,4 +110,4 @@ int scanhash_lyra2rev2( struct work *work, *hashes_done = pdata[19] - first_nonce + 1; return 0; } - +#endif diff --git a/algo/lyra2/lyra2rev3-4way.c b/algo/lyra2/lyra2rev3-4way.c index a7a9a3c..13ccdd7 100644 --- a/algo/lyra2/lyra2rev3-4way.c +++ b/algo/lyra2/lyra2rev3-4way.c @@ -79,19 +79,16 @@ void lyra2rev3_16way_hash( void *state, const void *input ) dintrlv_2x256( hash14, hash15, vhash, 256 ); intrlv_4x128( vhash, hash0, hash1, hash2, hash3, 256 ); - cube_4way_update_close( &ctx.cube, vhash, vhash, 32 ); + cube_4way_full( &ctx.cube, vhash, 256, vhash, 32 ); dintrlv_4x128( hash0, hash1, hash2, hash3, vhash, 256 ); intrlv_4x128( vhash, hash4, hash5, hash6, hash7, 256 ); - cube_4way_init( &ctx.cube, 256, 16, 32 ); - cube_4way_update_close( &ctx.cube, vhash, vhash, 32 ); + cube_4way_full( &ctx.cube, vhash, 256, vhash, 32 ); dintrlv_4x128( hash4, hash5, hash6, hash7, vhash, 256 ); intrlv_4x128( vhash, hash8, hash9, hash10, hash11, 256 ); - cube_4way_init( &ctx.cube, 256, 16, 32 ); - cube_4way_update_close( &ctx.cube, vhash, vhash, 32 ); + cube_4way_full( &ctx.cube, vhash, 256, vhash, 32 ); dintrlv_4x128( hash8, hash9, hash10, hash11, vhash, 256 ); intrlv_4x128( vhash, hash12, hash13, hash14, hash15, 256 ); - cube_4way_init( &ctx.cube, 256, 16, 32 ); - cube_4way_update_close( &ctx.cube, vhash, vhash, 32 ); + cube_4way_full( &ctx.cube, vhash, 256, vhash, 32 ); dintrlv_4x128( hash12, hash13, hash14, hash15, vhash, 256 ); intrlv_2x256( vhash, hash0, hash1, 256 ); @@ -224,21 +221,14 @@ void lyra2rev3_8way_hash( void *state, const void *input ) LYRA2REV3( l2v3_wholeMatrix, hash6, 32, hash6, 32, hash6, 32, 1, 4, 4 ); LYRA2REV3( l2v3_wholeMatrix, hash7, 32, hash7, 32, hash7, 32, 1, 4, 4 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 ); - cubehashInit( &ctx.cube, 256, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash1, (const byte*) hash1, 32 ); - cubehashInit( &ctx.cube, 256, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash2, (const byte*) hash2, 32 ); - cubehashInit( &ctx.cube, 256, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 ); - cubehashInit( &ctx.cube, 256, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash4, (const byte*) hash4, 32 ); - cubehashInit( &ctx.cube, 256, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash5, (const byte*) hash5, 32 ); - cubehashInit( &ctx.cube, 256, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash6, (const byte*) hash6, 32 ); - cubehashInit( &ctx.cube, 256, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash7, (const byte*) hash7, 32 ); + cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 ); + cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 ); + cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 ); + cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 ); + cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 ); + cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 ); + cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 ); + cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 ); LYRA2REV3( l2v3_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 ); LYRA2REV3( l2v3_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 ); @@ -265,25 +255,24 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce, uint32_t *hash7 = &hash[7<<3]; uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t *pdata = work->data; - const uint32_t *ptarget = work->target; + uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce - 8; uint32_t n = first_nonce; const uint32_t Htarg = ptarget[7]; __m256i *noncev = (__m256i*)vdata + 19; // aligned const int thr_id = mythr->id; + const bool bench = opt_benchmark; - if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0000ff; + if ( bench ) ptarget[7] = 0x0000ff; mm256_bswap32_intrlv80_8x32( vdata, pdata ); - + *noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ); blake256_8way_init( &l2v3_8way_ctx.blake ); blake256_8way_update( &l2v3_8way_ctx.blake, vdata, 64 ); do { - *noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4, - n+3, n+2, n+1, n ) ); - lyra2rev3_8way_hash( hash, vdata ); pdata[19] = n; @@ -291,15 +280,17 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce, if ( unlikely( hash7[lane] <= Htarg ) ) { extr_lane_8x32( lane_hash, hash, lane, 256 ); - if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) ) + if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) ) { - pdata[19] = n + lane; + pdata[19] = bswap_32( n + lane ); submit_lane_solution( work, lane_hash, mythr, lane ); } } + *noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) ); n += 8; - } while ( likely( (n < max_nonce-8) && !work_restart[thr_id].restart ) ); - *hashes_done = n - first_nonce + 1; + } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) ); + pdata[19] = n; + *hashes_done = n - first_nonce; return 0; } diff --git a/algo/lyra2/lyra2rev3.c b/algo/lyra2/lyra2rev3.c index 83380d9..f815058 100644 --- a/algo/lyra2/lyra2rev3.c +++ b/algo/lyra2/lyra2rev3.c @@ -1,4 +1,7 @@ #include "lyra2-gate.h" + +#if !( defined(LYRA2REV3_16WAY) || defined(LYRA2REV3_8WAY) || defined(LYRA2REV3_4WAY) ) + #include #include "algo/blake/sph_blake.h" #include "algo/cubehash/sph_cubehash.h" @@ -96,4 +99,4 @@ int scanhash_lyra2rev3( struct work *work, *hashes_done = pdata[19] - first_nonce + 1; return 0; } - +#endif diff --git a/algo/lyra2/lyra2z-4way.c b/algo/lyra2/lyra2z-4way.c index 7273ebe..e31e43b 100644 --- a/algo/lyra2/lyra2z-4way.c +++ b/algo/lyra2/lyra2z-4way.c @@ -97,41 +97,42 @@ void lyra2z_16way_hash( void *state, const void *input ) int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { - uint32_t hash[8*16] __attribute__ ((aligned (128))); + uint64_t hash[4*16] __attribute__ ((aligned (128))); uint32_t vdata[20*16] __attribute__ ((aligned (64))); uint32_t *pdata = work->data; uint32_t *ptarget = work->target; - const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; + const uint32_t last_nonce = max_nonce - 16; __m512i *noncev = (__m512i*)vdata + 19; // aligned - int thr_id = mythr->id; // thr_id arg is deprecated + const int thr_id = mythr->id; + const bool bench = opt_benchmark; - if ( opt_benchmark ) - ptarget[7] = 0x0000ff; + if ( bench ) ptarget[7] = 0x0000ff; mm512_bswap32_intrlv80_16x32( vdata, pdata ); + *noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8, + n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n +1, n ); lyra2z_16way_midstate( vdata ); do { - *noncev = mm512_bswap_32( _mm512_set_epi32( n+15, n+14, n+13, n+12, - n+11, n+10, n+ 9, n+ 8, - n+ 7, n+ 6, n+ 5, n+ 4, - n+ 3, n+ 2, n+ 1, n ) ); lyra2z_16way_hash( hash, vdata ); - pdata[19] = n; - for ( int i = 0; i < 16; i++ ) - if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) - && !opt_benchmark ) + for ( int lane = 0; lane < 16; lane++ ) { - pdata[19] = n+i; - submit_lane_solution( work, hash+(i<<3), mythr, i ); + const uint64_t *lane_hash = hash + (lane<<2); + if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) ) + { + pdata[19] = bswap_32( n + lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); + } } + *noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) ); n += 16; - } while ( (n < max_nonce-16) && !work_restart[thr_id].restart); + } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) ); - *hashes_done = n - first_nonce + 1; + pdata[19] = n; + *hashes_done = n - first_nonce; return 0; } @@ -195,39 +196,40 @@ void lyra2z_8way_hash( void *state, const void *input ) int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { - uint32_t hash[8*8] __attribute__ ((aligned (64))); + uint64_t hash[4*8] __attribute__ ((aligned (64))); uint32_t vdata[20*8] __attribute__ ((aligned (64))); uint32_t *pdata = work->data; uint32_t *ptarget = work->target; - const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce - 8; uint32_t n = first_nonce; __m256i *noncev = (__m256i*)vdata + 19; // aligned - int thr_id = mythr->id; // thr_id arg is deprecated + const int thr_id = mythr->id; + const bool bench = opt_benchmark; - if ( opt_benchmark ) - ptarget[7] = 0x0000ff; + if ( bench ) ptarget[7] = 0x0000ff; mm256_bswap32_intrlv80_8x32( vdata, pdata ); + *noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ); lyra2z_8way_midstate( vdata ); do { - *noncev = mm256_bswap_32( - _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) ); lyra2z_8way_hash( hash, vdata ); - pdata[19] = n; - for ( int i = 0; i < 8; i++ ) - if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) - && !opt_benchmark ) + for ( int lane = 0; lane < 8; lane++ ) { - pdata[19] = n+i; - submit_lane_solution( work, hash+(i<<3), mythr, i ); + const uint64_t *lane_hash = hash + (lane<<2); + if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) ) + { + pdata[19] = bswap_32( n + lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); + } } + *noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) ); n += 8; - } while ( (n < max_nonce-8) && !work_restart[thr_id].restart); - - *hashes_done = n - first_nonce + 1; + } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart) ); + pdata[19] = n; + *hashes_done = n - first_nonce; return 0; } @@ -274,39 +276,40 @@ void lyra2z_4way_hash( void *state, const void *input ) int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { - uint32_t hash[8*4] __attribute__ ((aligned (64))); + uint64_t hash[4*4] __attribute__ ((aligned (64))); uint32_t vdata[20*4] __attribute__ ((aligned (64))); uint32_t *pdata = work->data; uint32_t *ptarget = work->target; - const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce - 4; uint32_t n = first_nonce; __m128i *noncev = (__m128i*)vdata + 19; // aligned - int thr_id = mythr->id; // thr_id arg is deprecated + const int thr_id = mythr->id; + const bool bench = opt_benchmark; - if ( opt_benchmark ) - ptarget[7] = 0x0000ff; + if ( bench ) ptarget[7] = 0x0000ff; mm128_bswap32_intrlv80_4x32( vdata, pdata ); + *noncev = _mm_set_epi32( n+3, n+2, n+1, n ); lyra2z_4way_midstate( vdata ); do { - *noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) ); - lyra2z_4way_hash( hash, vdata ); - pdata[19] = n; - - for ( int i = 0; i < 4; i++ ) - if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) - && !opt_benchmark ) + for ( int lane = 0; lane < 4; lane++ ) { - pdata[19] = n+i; - submit_lane_solution( work, hash+(i<<3), mythr, i ); + const uint64_t *lane_hash = hash + (lane<<2); + if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) ) + { + pdata[19] = bswap_32( n + lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); + } } + *noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) ); n += 4; - } while ( (n < max_nonce-4) && !work_restart[thr_id].restart); + } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) ); - *hashes_done = n - first_nonce + 1; + pdata[19] = n; + *hashes_done = n - first_nonce; return 0; } diff --git a/algo/lyra2/lyra2z.c b/algo/lyra2/lyra2z.c index b1ab094..4d602f2 100644 --- a/algo/lyra2/lyra2z.c +++ b/algo/lyra2/lyra2z.c @@ -1,6 +1,9 @@ #include #include #include "lyra2-gate.h" + +#if !( defined(LYRA2Z_16WAY) || defined(LYRA2Z_8WAY) || defined(LYRA2Z_4WAY) ) + #include "lyra2.h" #include "algo/blake/sph_blake.h" #include "simd-utils.h" @@ -80,4 +83,4 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } - +#endif diff --git a/algo/lyra2/phi2.c b/algo/lyra2/phi2.c index cad10b3..21fb5a4 100644 --- a/algo/lyra2/phi2.c +++ b/algo/lyra2/phi2.c @@ -96,32 +96,30 @@ int scanhash_phi2( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(128) hash[8]; - uint32_t _ALIGN(128) endiandata[36]; + uint32_t _ALIGN(128) edata[36]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; - int thr_id = mythr->id; // thr_id arg is deprecated - - if(opt_benchmark){ - ptarget[7] = 0x00ff; - } + const int thr_id = mythr->id; + const bool bench = opt_benchmark; + if( bench ) ptarget[7] = 0x00ff; phi2_has_roots = false; - for ( int i=0; i < 36; i++ ) + + for ( int i = 0; i < 36; i++ ) { - be32enc(&endiandata[i], pdata[i]); + be32enc( &edata[i], pdata[i] ); if ( i >= 20 && pdata[i] ) phi2_has_roots = true; } do { - be32enc( &endiandata[19], n ); - phi2_hash( hash, endiandata ); - if ( hash[7] < Htarg ) - if ( fulltest( hash, ptarget ) && !opt_benchmark ) + edata[19] = n; + phi2_hash( hash, edata ); + if ( valid_hash( hash, ptarget ) && !opt_benchmark ) { - pdata[19] = n; + be32enc( pdata+19, n ); submit_solution( work, hash, mythr ); } n++; diff --git a/algo/lyra2/sponge.c b/algo/lyra2/sponge.c index 9f400b5..34ef08f 100644 --- a/algo/lyra2/sponge.c +++ b/algo/lyra2/sponge.c @@ -89,6 +89,9 @@ inline void initState( uint64_t State[/*16*/] ) * * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function */ + +#if !defined(__AVX512F__) && !defined(__AVX2__) && !defined(__SSE2__) + inline static void blake2bLyra( uint64_t *v ) { ROUND_LYRA(0); @@ -114,6 +117,8 @@ inline static void reducedBlake2bLyra( uint64_t *v ) ROUND_LYRA(0); } +#endif + /** * Performs a squeeze operation, using Blake2b's G function as the * internal permutation diff --git a/algo/lyra2/sponge.h b/algo/lyra2/sponge.h index cb8d8d0..b24b173 100644 --- a/algo/lyra2/sponge.h +++ b/algo/lyra2/sponge.h @@ -171,7 +171,6 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \ LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) - #endif // AVX2 else SSE2 // Scalar @@ -200,7 +199,6 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); - #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) union _ovly_512 diff --git a/algo/nist5/nist5.c b/algo/nist5/nist5.c index 307ffd8..ce965ca 100644 --- a/algo/nist5/nist5.c +++ b/algo/nist5/nist5.c @@ -1,4 +1,7 @@ #include "nist5-gate.h" + +#if !defined(NIST5_8WAY) && !defined(NIST5_4WAY) + #include #include #include @@ -105,13 +108,4 @@ int scanhash_nist5( struct work *work, uint32_t max_nonce, pdata[19] = n; return 0; } -/* -bool register_nist5_algo( algo_gate_t* gate ) -{ - gate->optimizations = SSE2_OPT | AES_OPT; - init_nist5_ctx(); - gate->scanhash = (void*)&scanhash_nist5; - gate->hash = (void*)&nist5hash; - return true; -}; -*/ +#endif diff --git a/algo/quark/anime.c b/algo/quark/anime.c index f1a1199..fe07334 100644 --- a/algo/quark/anime.c +++ b/algo/quark/anime.c @@ -1,5 +1,8 @@ #include "cpuminer-config.h" #include "anime-gate.h" + +#if !defined(ANIME_8WAY) && !defined(ANIME_4WAY) + #include #include #include @@ -169,4 +172,4 @@ int scanhash_anime( struct work *work, uint32_t max_nonce, pdata[19] = n; return 0; } - +#endif diff --git a/algo/quark/hmq1725.c b/algo/quark/hmq1725.c index be1d96e..ea1ca62 100644 --- a/algo/quark/hmq1725.c +++ b/algo/quark/hmq1725.c @@ -1,4 +1,7 @@ #include "hmq1725-gate.h" + +#if !defined(HMQ1725_8WAY) && !defined(HMQ1725_4WAY) + #include #include #include "algo/blake/sph_blake.h" @@ -7,10 +10,7 @@ #include "algo/jh/sph_jh.h" #include "algo/keccak/sph_keccak.h" #include "algo/skein/sph_skein.h" -#include "algo/luffa/sph_luffa.h" -#include "algo/cubehash/sph_cubehash.h" #include "algo/shavite/sph_shavite.h" -#include "algo/simd/sph_simd.h" #include "algo/echo/sph_echo.h" #include "algo/hamsi/sph_hamsi.h" #include "algo/fugue/sph_fugue.h" @@ -21,6 +21,9 @@ #if defined(__AES__) #include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/echo/aes_ni/hash_api.h" +#else + #include "algo/groestl/sph_groestl.h" + #include "algo/echo/sph_echo.h" #endif #include "algo/luffa/luffa_for_sse2.h" #include "algo/cubehash/cubehash_sse2.h" @@ -392,3 +395,4 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce, pdata[19] = n; return 0; } +#endif diff --git a/algo/quark/quark.c b/algo/quark/quark.c index 00a5d58..3f26e06 100644 --- a/algo/quark/quark.c +++ b/algo/quark/quark.c @@ -1,5 +1,8 @@ #include "cpuminer-config.h" #include "quark-gate.h" + +#if !defined(QUARK_8WAY) && !defined(QUARK_4WAY) + #include #include #include @@ -137,4 +140,4 @@ int scanhash_quark( struct work *work, uint32_t max_nonce, pdata[19] = n; return 0; } - +#endif diff --git a/algo/qubit/deep.c b/algo/qubit/deep.c index 85a66dd..230a291 100644 --- a/algo/qubit/deep.c +++ b/algo/qubit/deep.c @@ -1,4 +1,7 @@ #include "deep-gate.h" + +#if !defined(DEEP_8WAY) && !defined(DEEP_4WAY) + #include #include #include @@ -114,4 +117,4 @@ int scanhash_deep( struct work *work, uint32_t max_nonce, pdata[19] = n; return 0; } - +#endif diff --git a/algo/qubit/qubit.c b/algo/qubit/qubit.c index 3794eee..38b72ee 100644 --- a/algo/qubit/qubit.c +++ b/algo/qubit/qubit.c @@ -1,4 +1,7 @@ #include "qubit-gate.h" + +#if !defined(QUBIT_8WAY) && !defined(QUBIT_4WAY) + #include #include #include @@ -126,4 +129,4 @@ int scanhash_qubit( struct work *work, uint32_t max_nonce, pdata[19] = n; return 0; } - +#endif diff --git a/algo/ripemd/lbry.c b/algo/ripemd/lbry.c index c7fbb12..314d019 100644 --- a/algo/ripemd/lbry.c +++ b/algo/ripemd/lbry.c @@ -1,4 +1,7 @@ #include "lbry-gate.h" + +#if !defined(LBRY_16WAY) && !defined(LBRY_8WAY) && !defined(LBRY_4WAY) + #include #include #include @@ -100,3 +103,4 @@ int scanhash_lbry( struct work *work, uint32_t max_nonce, pdata[27] = n; return 0; } +#endif diff --git a/algo/scrypt/pluck.c b/algo/scrypt/pluck.c deleted file mode 100644 index a0e7275..0000000 --- a/algo/scrypt/pluck.c +++ /dev/null @@ -1,505 +0,0 @@ -/* - * Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2014 pooler, 2015 Jordan Earls - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "cpuminer-config.h" -#include "algo-gate-api.h" - -#include -#include - -#define BLOCK_HEADER_SIZE 80 - -// windows -#ifndef htobe32 -#define htobe32(x) ((uint32_t)htonl((uint32_t)(x))) -#endif - -#ifdef _MSC_VER -#define ROTL(a, b) _rotl(a,b) -#define ROTR(a, b) _rotr(a,b) -#else -#define ROTL(a, b) (((a) << b) | ((a) >> (32 - b))) -#define ROTR(a, b) ((a >> b) | (a << (32 - b))) -#endif - -#if defined(_MSC_VER) && defined(_M_X64) -#define _VECTOR __vectorcall -#include -//#include //SSE2 -//#include //SSE3 -//#include //SSSE3 -//#include //SSE4.1 -//#include //SSE4.2 -//#include //SSE4A -//#include //AES -//#include //AVX -#define OPT_COMPATIBLE -#elif defined(__GNUC__) && defined(__x86_64__) -#include -#define _VECTOR -#endif - -static __thread char *scratchbuf; - -#ifdef OPT_COMPATIBLE -static void _VECTOR xor_salsa8(__m128i B[4], const __m128i Bx[4], int i) -{ - __m128i X0, X1, X2, X3; - - if (i <= 128) { - // a xor 0 = a - X0 = B[0] = Bx[0]; - X1 = B[1] = Bx[1]; - X2 = B[2] = Bx[2]; - X3 = B[3] = Bx[3]; - } else { - X0 = B[0] = _mm_xor_si128(B[0], Bx[0]); - X1 = B[1] = _mm_xor_si128(B[1], Bx[1]); - X2 = B[2] = _mm_xor_si128(B[2], Bx[2]); - X3 = B[3] = _mm_xor_si128(B[3], Bx[3]); - } - - for (i = 0; i < 4; i++) { - /* Operate on columns. */ - X1.m128i_u32[0] ^= ROTL(X0.m128i_u32[0] + X3.m128i_u32[0], 7); - X2.m128i_u32[1] ^= ROTL(X1.m128i_u32[1] + X0.m128i_u32[1], 7); - X3.m128i_u32[2] ^= ROTL(X2.m128i_u32[2] + X1.m128i_u32[2], 7); - X0.m128i_u32[3] ^= ROTL(X3.m128i_u32[3] + X2.m128i_u32[3], 7); - - X2.m128i_u32[0] ^= ROTL(X1.m128i_u32[0] + X0.m128i_u32[0], 9); - X3.m128i_u32[1] ^= ROTL(X2.m128i_u32[1] + X1.m128i_u32[1], 9); - X0.m128i_u32[2] ^= ROTL(X3.m128i_u32[2] + X2.m128i_u32[2], 9); - X1.m128i_u32[3] ^= ROTL(X0.m128i_u32[3] + X3.m128i_u32[3], 9); - - X3.m128i_u32[0] ^= ROTL(X2.m128i_u32[0] + X1.m128i_u32[0], 13); - X0.m128i_u32[1] ^= ROTL(X3.m128i_u32[1] + X2.m128i_u32[1], 13); - X1.m128i_u32[2] ^= ROTL(X0.m128i_u32[2] + X3.m128i_u32[2], 13); - X2.m128i_u32[3] ^= ROTL(X1.m128i_u32[3] + X0.m128i_u32[3], 13); - - X0.m128i_u32[0] ^= ROTL(X3.m128i_u32[0] + X2.m128i_u32[0], 18); - X1.m128i_u32[1] ^= ROTL(X0.m128i_u32[1] + X3.m128i_u32[1], 18); - X2.m128i_u32[2] ^= ROTL(X1.m128i_u32[2] + X0.m128i_u32[2], 18); - X3.m128i_u32[3] ^= ROTL(X2.m128i_u32[3] + X1.m128i_u32[3], 18); - - /* Operate on rows. */ - X0.m128i_u32[1] ^= ROTL(X0.m128i_u32[0] + X0.m128i_u32[3], 7); X1.m128i_u32[2] ^= ROTL(X1.m128i_u32[1] + X1.m128i_u32[0], 7); - X2.m128i_u32[3] ^= ROTL(X2.m128i_u32[2] + X2.m128i_u32[1], 7); X3.m128i_u32[0] ^= ROTL(X3.m128i_u32[3] + X3.m128i_u32[2], 7); - X0.m128i_u32[2] ^= ROTL(X0.m128i_u32[1] + X0.m128i_u32[0], 9); X1.m128i_u32[3] ^= ROTL(X1.m128i_u32[2] + X1.m128i_u32[1], 9); - X2.m128i_u32[0] ^= ROTL(X2.m128i_u32[3] + X2.m128i_u32[2], 9); X3.m128i_u32[1] ^= ROTL(X3.m128i_u32[0] + X3.m128i_u32[3], 9); - - X0.m128i_u32[3] ^= ROTL(X0.m128i_u32[2] + X0.m128i_u32[1], 13); X1.m128i_u32[0] ^= ROTL(X1.m128i_u32[3] + X1.m128i_u32[2], 13); - X2.m128i_u32[1] ^= ROTL(X2.m128i_u32[0] + X2.m128i_u32[3], 13); X3.m128i_u32[2] ^= ROTL(X3.m128i_u32[1] + X3.m128i_u32[0], 13); - X0.m128i_u32[0] ^= ROTL(X0.m128i_u32[3] + X0.m128i_u32[2], 18); X1.m128i_u32[1] ^= ROTL(X1.m128i_u32[0] + X1.m128i_u32[3], 18); - X2.m128i_u32[2] ^= ROTL(X2.m128i_u32[1] + X2.m128i_u32[0], 18); X3.m128i_u32[3] ^= ROTL(X3.m128i_u32[2] + X3.m128i_u32[1], 18); - } - - B[0] = _mm_add_epi32(B[0], X0); - B[1] = _mm_add_epi32(B[1], X1); - B[2] = _mm_add_epi32(B[2], X2); - B[3] = _mm_add_epi32(B[3], X3); -} - -#else - -static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16], int i) -{ - uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15; - - if (i <= 128) { - // a xor 0 = a - x00 = B[ 0] = Bx[ 0]; x01 = B[ 1] = Bx[ 1]; x02 = B[ 2] = Bx[ 2]; x03 = B[ 3] = Bx[ 3]; - x04 = B[ 4] = Bx[ 4]; x05 = B[ 5] = Bx[ 5]; x06 = B[ 6] = Bx[ 6]; x07 = B[ 7] = Bx[ 7]; - x08 = B[ 8] = Bx[ 8]; x09 = B[ 9] = Bx[ 9]; x10 = B[10] = Bx[10]; x11 = B[11] = Bx[11]; - x12 = B[12] = Bx[12]; x13 = B[13] = Bx[13]; x14 = B[14] = Bx[14]; x15 = B[15] = Bx[15]; - } else { - x00 = (B[ 0] ^= Bx[ 0]); - x01 = (B[ 1] ^= Bx[ 1]); - x02 = (B[ 2] ^= Bx[ 2]); - x03 = (B[ 3] ^= Bx[ 3]); - x04 = (B[ 4] ^= Bx[ 4]); - x05 = (B[ 5] ^= Bx[ 5]); - x06 = (B[ 6] ^= Bx[ 6]); - x07 = (B[ 7] ^= Bx[ 7]); - x08 = (B[ 8] ^= Bx[ 8]); - x09 = (B[ 9] ^= Bx[ 9]); - x10 = (B[10] ^= Bx[10]); - x11 = (B[11] ^= Bx[11]); - x12 = (B[12] ^= Bx[12]); - x13 = (B[13] ^= Bx[13]); - x14 = (B[14] ^= Bx[14]); - x15 = (B[15] ^= Bx[15]); - } - - for (i = 0; i < 8; i += 2) { - /* Operate on columns. */ - x04 ^= ROTL(x00 + x12, 7); x09 ^= ROTL(x05 + x01, 7); - x14 ^= ROTL(x10 + x06, 7); x03 ^= ROTL(x15 + x11, 7); - - x08 ^= ROTL(x04 + x00, 9); x13 ^= ROTL(x09 + x05, 9); - x02 ^= ROTL(x14 + x10, 9); x07 ^= ROTL(x03 + x15, 9); - - x12 ^= ROTL(x08 + x04, 13); x01 ^= ROTL(x13 + x09, 13); - x06 ^= ROTL(x02 + x14, 13); x11 ^= ROTL(x07 + x03, 13); - - x00 ^= ROTL(x12 + x08, 18); x05 ^= ROTL(x01 + x13, 18); - x10 ^= ROTL(x06 + x02, 18); x15 ^= ROTL(x11 + x07, 18); - - /* Operate on rows. */ - x01 ^= ROTL(x00 + x03, 7); x06 ^= ROTL(x05 + x04, 7); - x11 ^= ROTL(x10 + x09, 7); x12 ^= ROTL(x15 + x14, 7); - - x02 ^= ROTL(x01 + x00, 9); x07 ^= ROTL(x06 + x05, 9); - x08 ^= ROTL(x11 + x10, 9); x13 ^= ROTL(x12 + x15, 9); - - x03 ^= ROTL(x02 + x01, 13); x04 ^= ROTL(x07 + x06, 13); - x09 ^= ROTL(x08 + x11, 13); x14 ^= ROTL(x13 + x12, 13); - - x00 ^= ROTL(x03 + x02, 18); x05 ^= ROTL(x04 + x07, 18); - x10 ^= ROTL(x09 + x08, 18); x15 ^= ROTL(x14 + x13, 18); - } - B[ 0] += x00; - B[ 1] += x01; - B[ 2] += x02; - B[ 3] += x03; - B[ 4] += x04; - B[ 5] += x05; - B[ 6] += x06; - B[ 7] += x07; - B[ 8] += x08; - B[ 9] += x09; - B[10] += x10; - B[11] += x11; - B[12] += x12; - B[13] += x13; - B[14] += x14; - B[15] += x15; -} - -#endif - -static const uint32_t sha256_k[64] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, - 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, - 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, - 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, - 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, - 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, - 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, - 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, - 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - -/* Elementary functions used by SHA256 */ -#define Ch(x, y, z) ((x & (y ^ z)) ^ z) -#define Maj(x, y, z) ((x & (y | z)) | (y & z)) -#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) -#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) -#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ (x >> 3)) -#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ (x >> 10)) - -/* SHA256 round function */ -#define RND(a, b, c, d, e, f, g, h, k) \ - do { \ - t0 = h + S1(e) + Ch(e, f, g) + k; \ - t1 = S0(a) + Maj(a, b, c); \ - d += t0; \ - h = t0 + t1; \ - } while (0) - -/* Adjusted round function for rotating state */ -#define RNDr(S, W, i) \ - RND(S[(64 - i) % 8], S[(65 - i) % 8], \ - S[(66 - i) % 8], S[(67 - i) % 8], \ - S[(68 - i) % 8], S[(69 - i) % 8], \ - S[(70 - i) % 8], S[(71 - i) % 8], \ - W[i] + sha256_k[i]) - - -static void sha256_transform_volatile(uint32_t *state, uint32_t *block) -{ - uint32_t* W=block; //note: block needs to be a mutable 64 int32_t - uint32_t S[8]; - uint32_t t0, t1; - int i; - - for (i = 16; i < 64; i += 2) { - W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16]; - W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15]; - } - - /* 2. Initialize working variables. */ - memcpy(S, state, 32); - - /* 3. Mix. */ - RNDr(S, W, 0); - RNDr(S, W, 1); - RNDr(S, W, 2); - RNDr(S, W, 3); - RNDr(S, W, 4); - RNDr(S, W, 5); - RNDr(S, W, 6); - RNDr(S, W, 7); - RNDr(S, W, 8); - RNDr(S, W, 9); - RNDr(S, W, 10); - RNDr(S, W, 11); - RNDr(S, W, 12); - RNDr(S, W, 13); - RNDr(S, W, 14); - RNDr(S, W, 15); - RNDr(S, W, 16); - RNDr(S, W, 17); - RNDr(S, W, 18); - RNDr(S, W, 19); - RNDr(S, W, 20); - RNDr(S, W, 21); - RNDr(S, W, 22); - RNDr(S, W, 23); - RNDr(S, W, 24); - RNDr(S, W, 25); - RNDr(S, W, 26); - RNDr(S, W, 27); - RNDr(S, W, 28); - RNDr(S, W, 29); - RNDr(S, W, 30); - RNDr(S, W, 31); - RNDr(S, W, 32); - RNDr(S, W, 33); - RNDr(S, W, 34); - RNDr(S, W, 35); - RNDr(S, W, 36); - RNDr(S, W, 37); - RNDr(S, W, 38); - RNDr(S, W, 39); - RNDr(S, W, 40); - RNDr(S, W, 41); - RNDr(S, W, 42); - RNDr(S, W, 43); - RNDr(S, W, 44); - RNDr(S, W, 45); - RNDr(S, W, 46); - RNDr(S, W, 47); - RNDr(S, W, 48); - RNDr(S, W, 49); - RNDr(S, W, 50); - RNDr(S, W, 51); - RNDr(S, W, 52); - RNDr(S, W, 53); - RNDr(S, W, 54); - RNDr(S, W, 55); - RNDr(S, W, 56); - RNDr(S, W, 57); - RNDr(S, W, 58); - RNDr(S, W, 59); - RNDr(S, W, 60); - RNDr(S, W, 61); - RNDr(S, W, 62); - RNDr(S, W, 63); - - /* 4. Mix local working variables into global state */ - for (i = 0; i < 8; i++) - state[i] += S[i]; -} - -// standard sha256 hash -#if 1 -static void sha256_hash(unsigned char *hash, const unsigned char *data, int len) -{ - uint32_t _ALIGN(64) S[16]; - uint32_t _ALIGN(64) T[64]; - int i, r; - - sha256_init(S); - for (r = len; r > -9; r -= 64) { - if (r < 64) - memset(T, 0, 64); - memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r)); - if (r >= 0 && r < 64) - ((unsigned char *)T)[r] = 0x80; - for (i = 0; i < 16; i++) - T[i] = be32dec(T + i); - if (r < 56) - T[15] = 8 * len; - //sha256_transform(S, T, 0); - sha256_transform_volatile(S, T); - } - for (i = 0; i < 8; i++) - be32enc((uint32_t *)hash + i, S[i]); -} -#else -#include -static void sha256_hash(unsigned char *hash, const unsigned char *data, int len) -{ - SHA256_CTX ctx; - SHA256_Init(&ctx); - SHA256_Update(&ctx, data, len); - SHA256_Final(hash, &ctx); -} -#endif - -// hash exactly 64 bytes (ie, sha256 block size) -static void sha256_hash512(uint32_t *hash, const uint32_t *data) -{ - uint32_t _ALIGN(64) S[16]; - uint32_t _ALIGN(64) T[64]; - uchar _ALIGN(64) E[64*4] = { 0 }; - int i; - - sha256_init(S); - - for (i = 0; i < 16; i++) - T[i] = be32dec(&data[i]); - sha256_transform_volatile(S, T); - - E[3] = 0x80; - E[61] = 0x02; // T[15] = 8 * 64 => 0x200; - sha256_transform_volatile(S, (uint32_t*)E); - - for (i = 0; i < 8; i++) - be32enc(&hash[i], S[i]); -} - -void pluck_hash(uint32_t *hash, const uint32_t *data, uchar *hashbuffer, const int N) -{ - int size = N * 1024; - sha256_hash(hashbuffer, (void*)data, BLOCK_HEADER_SIZE); - memset(&hashbuffer[32], 0, 32); - - for(int i = 64; i < size - 32; i += 32) - { - uint32_t _ALIGN(64) randseed[16]; - uint32_t _ALIGN(64) randbuffer[16]; - uint32_t _ALIGN(64) joint[16]; - //i-4 because we use integers for all references against this, and we don't want to go 3 bytes over the defined area - //we could use size here, but then it's probable to use 0 as the value in most cases - int randmax = i - 4; - - //setup randbuffer to be an array of random indexes - memcpy(randseed, &hashbuffer[i - 64], 64); - - if(i > 128) memcpy(randbuffer, &hashbuffer[i - 128], 64); - //else memset(randbuffer, 0, 64); - - xor_salsa8((void*)randbuffer, (void*)randseed, i); - memcpy(joint, &hashbuffer[i - 32], 32); - - //use the last hash value as the seed - for (int j = 32; j < 64; j += 4) - { - //every other time, change to next random index - //randmax - 32 as otherwise we go beyond memory that's already been written to - uint32_t rand = randbuffer[(j - 32) >> 2] % (randmax - 32); - joint[j >> 2] = *((uint32_t *)&hashbuffer[rand]); - } - - sha256_hash512((uint32_t*) &hashbuffer[i], joint); - - //setup randbuffer to be an array of random indexes - //use last hash value and previous hash value(post-mixing) - memcpy(randseed, &hashbuffer[i - 32], 64); - - if(i > 128) memcpy(randbuffer, &hashbuffer[i - 128], 64); - //else memset(randbuffer, 0, 64); - - xor_salsa8((void*)randbuffer, (void*)randseed, i); - - //use the last hash value as the seed - for (int j = 0; j < 32; j += 2) - { - uint32_t rand = randbuffer[j >> 1] % randmax; - *((uint32_t *)(hashbuffer + rand)) = *((uint32_t *)(hashbuffer + j + randmax)); - } - } - - memcpy(hash, hashbuffer, 32); -} - -int scanhash_pluck( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - uint32_t _ALIGN(64) endiandata[20]; - uint32_t _ALIGN(64) hash[8]; - const uint32_t first_nonce = pdata[19]; - int thr_id = mythr->id; // thr_id arg is deprecated - volatile uint8_t *restart = &(work_restart[thr_id].restart); - uint32_t n = first_nonce; - - - if (opt_benchmark) - ((uint32_t*)ptarget)[7] = 0x0ffff; - - for (int i=0; i < 19; i++) - be32enc(&endiandata[i], pdata[i]); - - const uint32_t Htarg = ptarget[7]; - do { - //be32enc(&endiandata[19], n); - endiandata[19] = n; - pluck_hash(hash, endiandata, scratchbuf, opt_pluck_n); - - if (hash[7] <= Htarg && fulltest(hash, ptarget)) - { - *hashes_done = n - first_nonce + 1; - pdata[19] = htobe32(endiandata[19]); - return 1; - } - n++; - } while (n < max_nonce && !(*restart)); - - *hashes_done = n - first_nonce + 1; - pdata[19] = n; - return 0; -} - -bool pluck_miner_thread_init( int thr_id ) -{ - scratchbuf = malloc( 128 * 1024 ); - if ( scratchbuf ) - return true; - applog( LOG_ERR, "Thread %u: Pluck buffer allocation failed", thr_id ); - return false; -} - -bool register_pluck_algo( algo_gate_t* gate ) -{ - algo_not_tested(); - gate->miner_thread_init = (void*)&pluck_miner_thread_init; - gate->scanhash = (void*)&scanhash_pluck; - gate->hash = (void*)&pluck_hash; - opt_target_factor = 65536.0; - return true; -}; - - diff --git a/algo/sha/sha256q.c b/algo/sha/sha256q.c index 25f7d2d..772ba41 100644 --- a/algo/sha/sha256q.c +++ b/algo/sha/sha256q.c @@ -1,4 +1,7 @@ #include "sha256t-gate.h" + +#if !defined(SHA256T_16WAY) && !defined(SHA256T_8WAY) && !defined(SHA256T_4WAY) + #include #include #include @@ -102,3 +105,4 @@ int scanhash_sha256q( struct work *work, uint32_t max_nonce, pdata[19] = n; return 0; } +#endif diff --git a/algo/sha/sha256t.c b/algo/sha/sha256t.c index bb401d0..4dbfd33 100644 --- a/algo/sha/sha256t.c +++ b/algo/sha/sha256t.c @@ -1,4 +1,7 @@ #include "sha256t-gate.h" + +#if !defined(SHA256T_16WAY) && !defined(SHA256T_8WAY) && !defined(SHA256T_4WAY) + #include #include #include @@ -98,3 +101,5 @@ int scanhash_sha256t( struct work *work, uint32_t max_nonce, pdata[19] = n; return 0; } +#endif + diff --git a/algo/skein/skein.c b/algo/skein/skein.c index c493406..dba2ca0 100644 --- a/algo/skein/skein.c +++ b/algo/skein/skein.c @@ -1,4 +1,7 @@ #include "algo-gate-api.h" + +#if !defined(SKEIN_8WAY) && !defined(SKEIN_4WAY) + #include #include #include "sph_skein.h" @@ -52,4 +55,4 @@ int scanhash_skein( struct work *work, uint32_t max_nonce, return 0; } - +#endif diff --git a/algo/skein/skein2.c b/algo/skein/skein2.c index a42915b..cc58290 100644 --- a/algo/skein/skein2.c +++ b/algo/skein/skein2.c @@ -1,4 +1,7 @@ #include "skein-gate.h" + +#if !defined(SKEIN_8WAY) && !defined(SKEIN_4WAY) + #include #include @@ -66,4 +69,4 @@ int scanhash_skein2( struct work *work, uint32_t max_nonce, return 0; } - +#endif diff --git a/algo/whirlpool/md_helper.c b/algo/whirlpool/md_helper.c index a9f11db..5048806 100644 --- a/algo/whirlpool/md_helper.c +++ b/algo/whirlpool/md_helper.c @@ -252,12 +252,6 @@ SPH_XCAT(HASH, _addbits_and_close)(void *cc, current = (unsigned)sc->count_low & (SPH_BLEN - 1U); #endif -//uint64_t *b= (uint64_t*)sc->buf; -//uint64_t *s= (uint64_t*)sc->state; -// printf("Sptr 1= %u\n",current); -// printf("SBuf %016llx %016llx %016llx %016llx\n", b[0], b[1], b[2], b[3] ); -// printf("SBuf %016llx %016llx %016llx %016llx\n", b[4], b[5], b[6], b[7] ); - #ifdef PW01 sc->buf[current ++] = (0x100 | (ub & 0xFF)) >> (8 - n); #else @@ -269,10 +263,6 @@ SPH_XCAT(HASH, _addbits_and_close)(void *cc, } #endif -// printf("Sptr 2= %u\n",current); -// printf("SBuf %016llx %016llx %016llx %016llx\n", b[0], b[1], b[2], b[3] ); -// printf("SBuf %016llx %016llx %016llx %016llx\n", b[4], b[5], b[6], b[7] ); - if (current > SPH_MAXPAD) { memset(sc->buf + current, 0, SPH_BLEN - current); RFUN(sc->buf, SPH_VAL); @@ -333,16 +323,8 @@ SPH_XCAT(HASH, _addbits_and_close)(void *cc, #endif #endif -// printf("Sptr 3= %u\n",current); -// printf("SBuf %016llx %016llx %016llx %016llx\n", b[0], b[1], b[2], b[3] ); -// printf("SBuf %016llx %016llx %016llx %016llx\n", b[4], b[5], b[6], b[7] ); - RFUN(sc->buf, SPH_VAL); -// printf("Sptr after= %u\n",current); -// printf("SState %016llx %016llx %016llx %016llx\n", s[0], s[1], s[2], s[3] ); -// printf("SState %016llx %016llx %016llx %016llx\n", s[4], s[5], s[6], s[7] ); - #ifdef SPH_NO_OUTPUT (void)dst; (void)rnum; diff --git a/algo/x11/c11.c b/algo/x11/c11.c index 5ebf40d..d843b82 100644 --- a/algo/x11/c11.c +++ b/algo/x11/c11.c @@ -1,4 +1,7 @@ #include "c11-gate.h" + +#if !defined(C11_8WAY) && !defined(C11_4WAY) + #include #include #include @@ -9,9 +12,6 @@ #include "algo/keccak/sph_keccak.h" #include "algo/skein/sph_skein.h" #include "algo/shavite/sph_shavite.h" -#include "algo/luffa/sph_luffa.h" -#include "algo/cubehash/sph_cubehash.h" -#include "algo/simd/sph_simd.h" #include "algo/luffa/luffa_for_sse2.h" #include "algo/cubehash/cubehash_sse2.h" #include "algo/simd/nist.h" @@ -149,3 +149,4 @@ int scanhash_c11( struct work *work, uint32_t max_nonce, return 0; } +#endif diff --git a/algo/x11/fresh.c b/algo/x11/fresh.c deleted file mode 100644 index d81cc2f..0000000 --- a/algo/x11/fresh.c +++ /dev/null @@ -1,131 +0,0 @@ -#include "algo-gate-api.h" - -#include -#include -#include -#include - -#include "algo/shavite/sph_shavite.h" -#include "algo/simd/sph_simd.h" -#include "algo/echo/sph_echo.h" - -//#define DEBUG_ALGO - -extern void freshhash(void* output, const void* input, uint32_t len) -{ - unsigned char hash[128]; // uint32_t hashA[16], hashB[16]; - #define hashA hash - #define hashB hash+64 - - sph_shavite512_context ctx_shavite; - sph_simd512_context ctx_simd; - sph_echo512_context ctx_echo; - - sph_shavite512_init(&ctx_shavite); - sph_shavite512(&ctx_shavite, input, len); - sph_shavite512_close(&ctx_shavite, hashA); - - sph_simd512_init(&ctx_simd); - sph_simd512(&ctx_simd, hashA, 64); - sph_simd512_close(&ctx_simd, hashB); - - sph_shavite512_init(&ctx_shavite); - sph_shavite512(&ctx_shavite, hashB, 64); - sph_shavite512_close(&ctx_shavite, hashA); - - sph_simd512_init(&ctx_simd); - sph_simd512(&ctx_simd, hashA, 64); - sph_simd512_close(&ctx_simd, hashB); - - sph_echo512_init(&ctx_echo); - sph_echo512(&ctx_echo, hashB, 64); - sph_echo512_close(&ctx_echo, hashA); - - memcpy(output, hash, 32); -} - -int scanhash_fresh( struct work *work, - uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr) -{ - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - uint32_t len = 80; - int thr_id = mythr->id; // thr_id arg is deprecated - - uint32_t n = pdata[19] - 1; - const uint32_t first_nonce = pdata[19]; - const uint32_t Htarg = ptarget[7]; -#ifdef _MSC_VER - uint32_t __declspec(align(32)) hash64[8]; -#else - uint32_t hash64[8] __attribute__((aligned(32))); -#endif - uint32_t endiandata[32]; - - uint64_t htmax[] = { - 0, - 0xF, - 0xFF, - 0xFFF, - 0xFFFF, - 0x10000000 - }; - uint32_t masks[] = { - 0xFFFFFFFF, - 0xFFFFFFF0, - 0xFFFFFF00, - 0xFFFFF000, - 0xFFFF0000, - 0 - }; - - // we need bigendian data... - for (int k = 0; k < 19; k++) - be32enc(&endiandata[k], pdata[k]); - -#ifdef DEBUG_ALGO - if (Htarg != 0) - printf("[%d] Htarg=%X\n", thr_id, Htarg); -#endif - for (int m=0; m < 6; m++) { - if (Htarg <= htmax[m]) { - uint32_t mask = masks[m]; - do { - pdata[19] = ++n; - be32enc(&endiandata[19], n); - freshhash(hash64, endiandata, len); -#ifndef DEBUG_ALGO - if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) { - *hashes_done = n - first_nonce + 1; - return true; - } -#else - if (!(n % 0x1000) && !thr_id) printf("."); - if (!(hash64[7] & mask)) { - printf("[%d]",thr_id); - if (fulltest(hash64, ptarget)) { - *hashes_done = n - first_nonce + 1; - return true; - } - } -#endif - } while (n < max_nonce && !work_restart[thr_id].restart); - // see blake.c if else to understand the loop on htmax => mask - break; - } - } - - *hashes_done = n - first_nonce + 1; - pdata[19] = n; - return 0; -} - -bool register_fresh_algo( algo_gate_t* gate ) -{ - algo_not_tested(); - gate->scanhash = (void*)&scanhash_fresh; - gate->hash = (void*)&freshhash; - opt_target_factor = 256.0; - return true; -}; - diff --git a/algo/x11/timetravel.c b/algo/x11/timetravel.c index b148767..c6a593c 100644 --- a/algo/x11/timetravel.c +++ b/algo/x11/timetravel.c @@ -1,5 +1,7 @@ #include "timetravel-gate.h" +#if !defined(TIMETRAVEL_8WAY) && !defined(TIMETRAVEL_4WAY) + #include #include #include @@ -290,4 +292,4 @@ int scanhash_timetravel( struct work *work, uint32_t max_nonce, return 0; } - +#endif diff --git a/algo/x11/timetravel10.c b/algo/x11/timetravel10.c index 5360be9..073ba55 100644 --- a/algo/x11/timetravel10.c +++ b/algo/x11/timetravel10.c @@ -1,4 +1,7 @@ #include "timetravel10-gate.h" + +#if !defined(TIMETRAVEL10_8WAY) && !defined(TIMETRAVEL10_4WAY) + #include #include #include @@ -329,3 +332,4 @@ int scanhash_timetravel10( struct work *work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } +#endif diff --git a/algo/x11/tribus.c b/algo/x11/tribus.c index 89d1469..f7764e1 100644 --- a/algo/x11/tribus.c +++ b/algo/x11/tribus.c @@ -1,12 +1,13 @@ #include "tribus-gate.h" + +#if !defined(TRIBUS_8WAY) && !defined(TRIBUS_4WAY) + #include #include #include #include - #include "algo/jh//sph_jh.h" #include "algo/keccak/sph_keccak.h" - #ifdef __AES__ #include "algo/echo/aes_ni/hash_api.h" #else @@ -117,4 +118,4 @@ int scanhash_tribus( struct work *work, uint32_t max_nonce, return 0; } - +#endif diff --git a/algo/x11/x11.c b/algo/x11/x11.c index bdbfcaa..48135d5 100644 --- a/algo/x11/x11.c +++ b/algo/x11/x11.c @@ -1,5 +1,8 @@ #include "cpuminer-config.h" #include "x11-gate.h" + +#if !defined(X11_8WAY) && !defined(X11_4WAY) + #include #include #include @@ -10,9 +13,6 @@ #include "algo/keccak/sph_keccak.h" #include "algo/skein/sph_skein.h" #include "algo/shavite/sph_shavite.h" -#include "algo/luffa/sph_luffa.h" -#include "algo/cubehash/sph_cubehash.h" -#include "algo/simd/sph_simd.h" #include "algo/luffa/luffa_for_sse2.h" #include "algo/cubehash/cubehash_sse2.h" #include "algo/simd/nist.h" @@ -172,3 +172,4 @@ int scanhash_x11( struct work *work, uint32_t max_nonce, pdata[19] = n; return 0; } +#endif diff --git a/algo/x11/x11evo.c b/algo/x11/x11evo.c index 4a88d25..153390a 100644 --- a/algo/x11/x11evo.c +++ b/algo/x11/x11evo.c @@ -1,5 +1,8 @@ #include "cpuminer-config.h" #include "x11evo-gate.h" + +#if !defined(X11EVO_8WAY) && !defined(X11EVO_4WAY) + #include #include #include @@ -8,10 +11,7 @@ #include "algo/jh/sph_jh.h" #include "algo/keccak/sph_keccak.h" #include "algo/skein/sph_skein.h" -#include "algo/luffa/sph_luffa.h" -#include "algo/cubehash/sph_cubehash.h" #include "algo/shavite/sph_shavite.h" -#include "algo/simd/sph_simd.h" #ifdef __AES__ #include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/echo/aes_ni/hash_api.h" @@ -204,3 +204,4 @@ int scanhash_x11evo( struct work* work, uint32_t max_nonce, pdata[19] = n; return 0; } +#endif diff --git a/algo/x11/x11gost.c b/algo/x11/x11gost.c index 2eeb5c8..30523fa 100644 --- a/algo/x11/x11gost.c +++ b/algo/x11/x11gost.c @@ -1,4 +1,7 @@ #include "x11gost-gate.h" + +#if !defined(X11GOST_8WAY) && !defined(X11GOST_4WAY) + #include #include #include @@ -10,9 +13,6 @@ #include "algo/keccak/sph_keccak.h" #include "algo/skein/sph_skein.h" #include "algo/shavite/sph_shavite.h" -#include "algo/luffa/sph_luffa.h" -#include "algo/cubehash/sph_cubehash.h" -#include "algo/simd/sph_simd.h" #include "algo/luffa/luffa_for_sse2.h" #include "algo/cubehash/cubehash_sse2.h" #include "algo/simd/nist.h" @@ -160,3 +160,4 @@ int scanhash_x11gost( struct work *work, uint32_t max_nonce, return 0; } +#endif diff --git a/algo/x12/x12.c b/algo/x12/x12.c index 7027c70..ca1a3ca 100644 --- a/algo/x12/x12.c +++ b/algo/x12/x12.c @@ -1,5 +1,7 @@ #include "x12-gate.h" +#if !defined(X12_8WAY) && !defined(X12_4WAY) + #include #include #include @@ -12,9 +14,6 @@ #include "algo/keccak/sph_keccak.h" #include "algo/skein/sph_skein.h" #include "algo/shavite/sph_shavite.h" -#include "algo/luffa/sph_luffa.h" -#include "algo/cubehash/sph_cubehash.h" -#include "algo/simd/sph_simd.h" #include "algo/echo/sph_echo.h" #include "algo/hamsi/sph_hamsi.h" #include "algo/luffa/luffa_for_sse2.h" @@ -177,3 +176,4 @@ int scanhash_x12( struct work *work, uint32_t max_nonce, pdata[19] = n; return 0; } +#endif diff --git a/algo/x13/drop.c b/algo/x13/drop.c deleted file mode 100644 index 53aec2a..0000000 --- a/algo/x13/drop.c +++ /dev/null @@ -1,262 +0,0 @@ -/** - * ==========================(LICENSE BEGIN)============================ - * - * Copyright (c) 2015 kernels10, tpruvot - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * ===========================(LICENSE END)============================= - * - * @file drop.c - * @author kernels10 - * @author tpruvot - */ - -#define POK_BOOL_MASK 0x00008000 -#define POK_DATA_MASK 0xFFFF0000 - -#include "algo-gate-api.h" - -#include - -#include "algo/blake/sph_blake.h" -#include "algo/groestl/sph_groestl.h" -#include "algo/jh/sph_jh.h" -#include "algo/keccak/sph_keccak.h" -#include "algo/skein/sph_skein.h" -#include "algo/cubehash/sph_cubehash.h" -#include "algo/echo/sph_echo.h" -#include "algo/fugue//sph_fugue.h" -#include "algo/luffa/sph_luffa.h" -#include "algo/simd/sph_simd.h" -#include "algo/shavite/sph_shavite.h" - -static void shiftr_lp(const uint32_t *input, uint32_t *output, unsigned int shift) -{ - if(!shift) { - memcpy(output, input, 64); - return; - } - - memset(output, 0, 64); - for(int i = 0; i < 15; ++i) { - output[i + 1] |= (input[i] >> (32 - shift)); - output[i] |= (input[i] << shift); - } - - output[15] |= (input[15] << shift); - return; -} - -static void switchHash(const void *input, void *output, int id) -{ -/* - sph_keccak512_context ctx_keccak; - sph_blake512_context ctx_blake; - sph_groestl512_context ctx_groestl; - sph_skein512_context ctx_skein; - sph_luffa512_context ctx_luffa; - sph_echo512_context ctx_echo; - sph_simd512_context ctx_simd; - sph_cubehash512_context ctx_cubehash; - sph_fugue512_context ctx_fugue; - sph_shavite512_context ctx_shavite; - - switch(id) { - case 0: - sph_keccak512_init(&ctx_keccak); sph_keccak512(&ctx_keccak, input, 64); sph_keccak512_close(&ctx_keccak, output); - break; - case 1: - sph_blake512_init(&ctx_blake); sph_blake512(&ctx_blake, input, 64); sph_blake512_close(&ctx_blake, output); - break; - case 2: - sph_groestl512_init(&ctx_groestl); sph_groestl512(&ctx_groestl, input, 64); sph_groestl512_close(&ctx_groestl, output); - break; - case 3: - sph_skein512_init(&ctx_skein); sph_skein512(&ctx_skein, input, 64); sph_skein512_close(&ctx_skein, output); - break; - case 4: - sph_luffa512_init(&ctx_luffa); sph_luffa512(&ctx_luffa, input, 64); sph_luffa512_close(&ctx_luffa, output); - break; - case 5: - sph_echo512_init(&ctx_echo); sph_echo512(&ctx_echo, input, 64); sph_echo512_close(&ctx_echo, output); - break; - case 6: - sph_shavite512_init(&ctx_shavite); sph_shavite512(&ctx_shavite, input, 64); sph_shavite512_close(&ctx_shavite, output); - break; - case 7: - sph_fugue512_init(&ctx_fugue); sph_fugue512(&ctx_fugue, input, 64); sph_fugue512_close(&ctx_fugue, output); - break; - case 8: - sph_simd512_init(&ctx_simd); sph_simd512(&ctx_simd, input, 64); sph_simd512_close(&ctx_simd, output); - break; - case 9: - sph_cubehash512_init(&ctx_cubehash); sph_cubehash512(&ctx_cubehash, input, 64); sph_cubehash512_close(&ctx_cubehash, output); - break; - default: - break; - } -*/ -} - -void droplp_hash(void *state, const void *input) -{ - uint32_t _ALIGN(64) hash[2][16]; - sph_jh512_context ctx_jh; - uint32_t *hashA = hash[0]; - uint32_t *hashB = hash[1]; - - sph_jh512_init(&ctx_jh); - sph_jh512(&ctx_jh, input, 80); - sph_jh512_close(&ctx_jh, (void*)(hashA)); - - unsigned int startPosition = hashA[0] % 31; - unsigned int i = 0; - int j = 0; - int start = 0; - - for (i = startPosition; i < 31; i+=9) { - start = i % 10; - for (j = start; j < 10; j++) { - shiftr_lp(hashA, hashB, (i & 3)); - switchHash((const void*)hashB, (void*)hashA, j); - } - for (j = 0; j < start; j++) { - shiftr_lp(hashA, hashB, (i & 3)); - switchHash((const void*)hashB, (void*)hashA, j); - } - } - for (i = 0; i < startPosition; i += 9) { - start = i % 10; - for (j = start; j < 10; j++) { - shiftr_lp(hashA, hashB, (i & 3)); - switchHash((const void*)hashB, (void*)hashA, j); - } - for (j = 0; j < start; j++) { - shiftr_lp(hashA, hashB, (i & 3)); - switchHash((const void*)hashB, (void*)hashA, j); - } - } - - memcpy(state, hashA, 32); -} - -static void droplp_hash_pok(void *output, uint32_t *pdata, const uint32_t version) -{ - uint32_t _ALIGN(64) hash[8]; - uint32_t pok; - - pdata[0] = version; - droplp_hash(hash, pdata); - - // fill PoK - pok = version | (hash[0] & POK_DATA_MASK); - if (pdata[0] != pok) { - pdata[0] = pok; - droplp_hash(hash, pdata); - } - memcpy(output, hash, 32); -} - -int scanhash_drop( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ - uint32_t _ALIGN(64) hash[16]; - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - const uint32_t version = pdata[0] & (~POK_DATA_MASK); - const uint32_t first_nonce = pdata[19]; - uint32_t nonce = first_nonce; - int thr_id = mythr->id; // thr_id arg is deprecated - #define tmpdata pdata - - if (opt_benchmark) - ptarget[7] = 0x07ff; - - const uint32_t htarg = ptarget[7]; - - do { - tmpdata[19] = nonce; - droplp_hash_pok(hash, tmpdata, version); - - if (hash[7] <= htarg && fulltest(hash, ptarget)) { - pdata[0] = tmpdata[0]; - pdata[19] = nonce; - *hashes_done = pdata[19] - first_nonce + 1; - if (opt_debug) - applog(LOG_INFO, "found nonce %x", nonce); - return 1; - } - nonce++; - - } while (nonce < max_nonce && !work_restart[thr_id].restart); - - pdata[19] = nonce; - *hashes_done = pdata[19] - first_nonce + 1; - return 0; -} - -void drop_get_new_work( struct work* work, struct work* g_work, int thr_id, - uint32_t* end_nonce_ptr ) -{ - // ignore POK in first word -// const int nonce_i = 19; - const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t) - uint32_t *nonceptr = algo_gate.get_nonceptr( work->data ); - if ( memcmp( &work->data[1], &g_work->data[1], wkcmp_sz ) - || ( *nonceptr >= *end_nonce_ptr ) ) - { - work_free( work ); - work_copy( work, g_work ); - *nonceptr = ( 0xffffffffU / opt_n_threads ) * thr_id; - if ( opt_randomize ) - *nonceptr += ( (rand() *4 ) & UINT32_MAX ) / opt_n_threads; - *end_nonce_ptr = ( 0xffffffffU / opt_n_threads ) * (thr_id+1) - 0x20; - } - else - ++(*nonceptr); -} - -void drop_display_pok( struct work* work ) -{ - if ( work->data[0] & 0x00008000 ) - applog(LOG_BLUE, "POK received: %08xx", work->data[0] ); -} - -int drop_get_work_data_size() { return 80; } - -// Need to fix POK offset problems like zr5 -bool register_drop_algo( algo_gate_t* gate ) -{ - algo_not_tested(); - gate->scanhash = (void*)&scanhash_drop; - gate->hash = (void*)&droplp_hash_pok; - gate->get_new_work = (void*)&drop_get_new_work; - gate->build_stratum_request = (void*)&std_be_build_stratum_request; - gate->work_decode = (void*)&std_be_work_decode; - gate->submit_getwork_result = (void*)&std_be_submit_getwork_result; - gate->set_work_data_endian = (void*)&set_work_data_big_endian; - gate->decode_extra_data = (void*)&drop_display_pok; - gate->get_work_data_size = (void*)&drop_get_work_data_size; - gate->work_cmp_size = 72; - opt_target_factor = 65536.0; - return true; -}; - diff --git a/algo/x13/phi1612.c b/algo/x13/phi1612.c index ea59095..bbb86bd 100644 --- a/algo/x13/phi1612.c +++ b/algo/x13/phi1612.c @@ -1,4 +1,7 @@ #include "phi1612-gate.h" + +#if !defined(PHI1612_8WAY) && !defined(PHI1612_4WAY) + #include #include #include @@ -123,3 +126,4 @@ int scanhash_phi1612( struct work *work, uint32_t max_nonce, return 0; } +#endif diff --git a/algo/x13/skunk.c b/algo/x13/skunk.c index 00734d5..4172865 100644 --- a/algo/x13/skunk.c +++ b/algo/x13/skunk.c @@ -1,4 +1,7 @@ #include "skunk-gate.h" + +#if !defined(SKUNK_8WAY) && !defined(SKUNK_4WAY) + #include #include #include @@ -88,3 +91,4 @@ bool skunk_thread_init() sph_gost512_init( &skunk_ctx.gost ); return true; } +#endif diff --git a/algo/x13/x13.c b/algo/x13/x13.c index f0ce14b..109729e 100644 --- a/algo/x13/x13.c +++ b/algo/x13/x13.c @@ -1,4 +1,7 @@ #include "x13-gate.h" + +#if !defined(X13_8WAY) && !defined(X13_4WAY) + #include #include #include @@ -9,9 +12,6 @@ #include "algo/keccak/sph_keccak.h" #include "algo/skein/sph_skein.h" #include "algo/shavite/sph_shavite.h" -#include "algo/luffa/sph_luffa.h" -#include "algo/cubehash/sph_cubehash.h" -#include "algo/simd/sph_simd.h" #include "algo/hamsi/sph_hamsi.h" #include "algo/fugue/sph_fugue.h" #include "algo/luffa/luffa_for_sse2.h" @@ -185,3 +185,4 @@ int scanhash_x13( struct work *work, uint32_t max_nonce, pdata[19] = n; return 0; } +#endif diff --git a/algo/x13/x13bcd.c b/algo/x13/x13bcd.c index 290ad56..d53e37b 100644 --- a/algo/x13/x13bcd.c +++ b/algo/x13/x13bcd.c @@ -1,4 +1,7 @@ #include "x13sm3-gate.h" + +#if !defined(X13BCD_8WAY) && !defined(X13VCD_4WAY) + #include #include #include @@ -10,7 +13,6 @@ #include "algo/sm3/sph_sm3.h" #include "algo/skein/sph_skein.h" #include "algo/shavite/sph_shavite.h" -#include "algo/simd/sph_simd.h" #include "algo/hamsi/sph_hamsi.h" #include "algo/fugue/sph_fugue.h" #include "algo/cubehash/cubehash_sse2.h" @@ -184,3 +186,4 @@ int scanhash_x13bcd( struct work *work, uint32_t max_nonce, return 0; } +#endif diff --git a/algo/x13/x13sm3.c b/algo/x13/x13sm3.c index 4bde68e..6025739 100644 --- a/algo/x13/x13sm3.c +++ b/algo/x13/x13sm3.c @@ -1,4 +1,7 @@ #include "x13sm3-gate.h" + +#if !defined(X13SM3_8WAY) && !defined(X13SM3_4WAY) + #include #include #include @@ -10,7 +13,6 @@ #include "algo/sm3/sph_sm3.h" #include "algo/skein/sph_skein.h" #include "algo/shavite/sph_shavite.h" -#include "algo/simd/sph_simd.h" #include "algo/hamsi/sph_hamsi.h" #include "algo/fugue/sph_fugue.h" #include "algo/luffa/luffa_for_sse2.h" @@ -197,3 +199,4 @@ int scanhash_x13sm3( struct work *work, uint32_t max_nonce, return 0; } +#endif diff --git a/algo/x14/polytimos.c b/algo/x14/polytimos.c index b282ac8..e81c479 100644 --- a/algo/x14/polytimos.c +++ b/algo/x14/polytimos.c @@ -1,4 +1,7 @@ #include "polytimos-gate.h" + +#if !defined(POLYTIMOS_8WAY) && !defined(POLYTIMOS_4WAY) + #include #include #include @@ -111,3 +114,4 @@ int scanhash_polytimos( struct work *work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } +#endif diff --git a/algo/x14/veltor.c b/algo/x14/veltor.c index 427f49f..b7d7582 100644 --- a/algo/x14/veltor.c +++ b/algo/x14/veltor.c @@ -1,4 +1,7 @@ #include "veltor-gate.h" + +#if !defined(VELTOR_8WAY) && !defined(VELTOR_4WAY) + #include #include #include @@ -102,3 +105,4 @@ int scanhash_veltor( struct work *work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } +#endif diff --git a/algo/x14/x14.c b/algo/x14/x14.c index 401b084..83eab96 100644 --- a/algo/x14/x14.c +++ b/algo/x14/x14.c @@ -1,19 +1,17 @@ #include "x14-gate.h" + +#if !defined(X14_8WAY) && !defined(X14_4WAY) + #include #include #include #include #include "algo/blake/sph_blake.h" #include "algo/bmw/sph_bmw.h" -#include "algo/groestl/sph_groestl.h" #include "algo/jh/sph_jh.h" #include "algo/keccak/sph_keccak.h" #include "algo/skein/sph_skein.h" #include "algo/shavite/sph_shavite.h" -#include "algo/luffa/sph_luffa.h" -#include "algo/cubehash/sph_cubehash.h" -#include "algo/simd/sph_simd.h" -#include "algo/echo/sph_echo.h" #include "algo/hamsi/sph_hamsi.h" #include "algo/fugue/sph_fugue.h" #include "algo/shabal/sph_shabal.h" @@ -186,3 +184,4 @@ int scanhash_x14( struct work *work, uint32_t max_nonce, pdata[19] = n; return 0; } +#endif diff --git a/algo/x15/x15.c b/algo/x15/x15.c index eee7a24..c739e91 100644 --- a/algo/x15/x15.c +++ b/algo/x15/x15.c @@ -1,4 +1,7 @@ #include "x15-gate.h" + +#if !defined(X15_8WAY) && !defined(X15_4WAY) + #include #include #include @@ -9,9 +12,6 @@ #include "algo/keccak/sph_keccak.h" #include "algo/skein/sph_skein.h" #include "algo/shavite/sph_shavite.h" -#include "algo/luffa/sph_luffa.h" -#include "algo/cubehash/sph_cubehash.h" -#include "algo/simd/sph_simd.h" #include "algo/hamsi/sph_hamsi.h" #include "algo/fugue/sph_fugue.h" #include "algo/shabal/sph_shabal.h" @@ -217,3 +217,4 @@ int scanhash_x15( struct work *work, uint32_t max_nonce, pdata[19] = n; return 0; } +#endif diff --git a/algo/x16/hex.c b/algo/x16/hex.c index bd2df93..15f3f61 100644 --- a/algo/x16/hex.c +++ b/algo/x16/hex.c @@ -85,13 +85,6 @@ void hex_hash( void* output, const void* input ) memcpy( &ctx, &hex_ctx, sizeof(ctx) ); void *in = (void*) input; int size = 80; -/* - if ( s_ntime == UINT32_MAX ) - { - const uint8_t* in8 = (uint8_t*) input; - x16_r_s_getAlgoString( &in8[4], hashOrder ); - } -*/ char elem = hashOrder[0]; uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; @@ -249,12 +242,8 @@ int scanhash_hex( struct work *work, uint32_t max_nonce, const bool bench = opt_benchmark; if ( bench ) ptarget[7] = 0x0cff; - casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) ); - casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) ); - casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) ); - casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) ); - casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) ); - + mm128_bswap32_80( edata, pdata ); + uint32_t ntime = swab32(pdata[17]); if ( s_ntime != ntime ) { @@ -277,6 +266,10 @@ int scanhash_hex( struct work *work, uint32_t max_nonce, sph_skein512_init( &hex_ctx.skein ); sph_skein512( &hex_ctx.skein, edata, 64 ); break; + case LUFFA: + init_luffa( &hex_ctx.luffa, 512 ); + update_luffa( &hex_ctx.luffa, (const BitSequence*)edata, 64 ); + break; case CUBEHASH: cubehashInit( &hex_ctx.cube, 512, 16, 32 ); cubehashUpdate( &hex_ctx.cube, (const byte*)edata, 64 ); diff --git a/algo/x16/x16r-4way.c b/algo/x16/x16r-4way.c index f31820b..5d9f243 100644 --- a/algo/x16/x16r-4way.c +++ b/algo/x16/x16r-4way.c @@ -2,74 +2,85 @@ * x16r algo implementation * * Implementation by tpruvot@github Jan 2018 - * Optimized by JayDDee@github Jan 2018 + * Optimized by https://github.com/JayDDee/ Jan 2018 */ #include "x16r-gate.h" #include #include #include -#include "algo/blake/blake-hash-4way.h" -#include "algo/bmw/bmw-hash-4way.h" -#include "algo/groestl/aes_ni/hash-groestl.h" -#include "algo/groestl/aes_ni/hash-groestl.h" -#include "algo/skein/skein-hash-4way.h" -#include "algo/jh/jh-hash-4way.h" -#include "algo/keccak/keccak-hash-4way.h" -#include "algo/shavite/sph_shavite.h" -#include "algo/luffa/luffa-hash-2way.h" -#include "algo/luffa/luffa_for_sse2.h" -#include "algo/cubehash/cube-hash-2way.h" -#include "algo/cubehash/cubehash_sse2.h" -#include "algo/simd/simd-hash-2way.h" -#include "algo/echo/aes_ni/hash_api.h" -#include "algo/hamsi/hamsi-hash-4way.h" -#include "algo/fugue/sph_fugue.h" -#include "algo/shabal/shabal-hash-4way.h" -#include "algo/whirlpool/sph_whirlpool.h" -#include "algo/sha/sha-hash-4way.h" -#if defined(__VAES__) - #include "algo/groestl/groestl512-hash-4way.h" - #include "algo/shavite/shavite-hash-4way.h" - #include "algo/echo/echo-hash-4way.h" -#endif + +// The hash and prehash code is shared among x16r, x16s, x16rt, and x21s. +// The generic function performs the x16 hash as per the hash order +// and produces a 512 bit intermediate hash which needs to be converted +// to 256 bit final hash by a wrapper function. #if defined (X16R_8WAY) -static __thread uint32_t s_ntime = UINT32_MAX; -static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 }; +// Perform midstate prehash of hash functions with block size <= 64 bytes +// and interleave 4x64 before nonce insertion for final hash. -union _x16r_8way_context_overlay +void x16r_8way_prehash( void *vdata, void *pdata ) { - blake512_8way_context blake; - bmw512_8way_context bmw; - skein512_8way_context skein; - jh512_8way_context jh; - keccak512_8way_context keccak; - luffa_4way_context luffa; - cubehashParam cube; -// cube_4way_context cube; - simd_4way_context simd; - hamsi512_8way_context hamsi; - sph_fugue512_context fugue; - shabal512_8way_context shabal; - sph_whirlpool_context whirlpool; - sha512_8way_context sha512; -#if defined(__VAES__) - groestl512_4way_context groestl; - shavite512_4way_context shavite; - echo_4way_context echo; -#else - hashState_groestl groestl; - sph_shavite512_context shavite; - hashState_echo echo; -#endif -} __attribute__ ((aligned (64))); + uint32_t vdata2[20*8] __attribute__ ((aligned (64))); + uint32_t edata[20] __attribute__ ((aligned (64))); -typedef union _x16r_8way_context_overlay x16r_8way_context_overlay; + const char elem = x16r_hash_order[0]; + const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; -static __thread x16r_8way_context_overlay x16r_ctx; + switch ( algo ) + { + case JH: + mm512_bswap32_intrlv80_8x64( vdata, pdata ); + jh512_8way_init( &x16r_ctx.jh ); + jh512_8way_update( &x16r_ctx.jh, vdata, 64 ); + break; + case SKEIN: + mm512_bswap32_intrlv80_8x64( vdata, pdata ); + skein512_8way_init( &x16r_ctx.skein ); + skein512_8way_update( &x16r_ctx.skein, vdata, 64 ); + break; + case LUFFA: + mm128_bswap32_80( edata, pdata ); + intrlv_4x128( vdata2, edata, edata, edata, edata, 640 ); + luffa_4way_init( &x16r_ctx.luffa, 512 ); + luffa_4way_update( &x16r_ctx.luffa, vdata2, 64 ); + rintrlv_4x128_8x64( vdata, vdata2, vdata2, 640 ); + break; + case CUBEHASH: + mm128_bswap32_80( edata, pdata ); + cubehashInit( &x16r_ctx.cube, 512, 16, 32 ); + cubehashUpdate( &x16r_ctx.cube, (const byte*)edata, 64 ); + intrlv_8x64( vdata, edata, edata, edata, edata, + edata, edata, edata, edata, 640 ); + break; + case HAMSI: + mm512_bswap32_intrlv80_8x64( vdata, pdata ); + hamsi512_8way_init( &x16r_ctx.hamsi ); + hamsi512_8way_update( &x16r_ctx.hamsi, vdata, 64 ); + break; + case SHABAL: + mm256_bswap32_intrlv80_8x32( vdata2, pdata ); + shabal512_8way_init( &x16r_ctx.shabal ); + shabal512_8way_update( &x16r_ctx.shabal, vdata2, 64 ); + rintrlv_8x32_8x64( vdata, vdata2, 640 ); + break; + case WHIRLPOOL: + mm128_bswap32_80( edata, pdata ); + sph_whirlpool_init( &x16r_ctx.whirlpool ); + sph_whirlpool( &x16r_ctx.whirlpool, edata, 64 ); + intrlv_8x64( vdata, edata, edata, edata, edata, + edata, edata, edata, edata, 640 ); + break; + default: + mm512_bswap32_intrlv80_8x64( vdata, pdata ); + } +} -void x16r_8way_hash( void* output, const void* input ) +// Perform the full x16r hash and returns 512 bit intermediate hash. +// Called by wrapper hash function to optionally continue hashing and +// convert to final hash. + +void x16r_8way_hash_generic( void* output, const void* input ) { uint32_t vhash[20*8] __attribute__ ((aligned (128))); uint32_t hash0[20] __attribute__ ((aligned (64))); @@ -97,7 +108,7 @@ void x16r_8way_hash( void* output, const void* input ) for ( int i = 0; i < 16; i++ ) { - const char elem = hashOrder[i]; + const char elem = x16r_hash_order[i]; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; switch ( algo ) @@ -464,23 +475,39 @@ void x16r_8way_hash( void* output, const void* input ) size = 64; } - memcpy( output, hash0, 32 ); - memcpy( output+32, hash1, 32 ); - memcpy( output+64, hash2, 32 ); - memcpy( output+96, hash3, 32 ); - memcpy( output+128, hash4, 32 ); - memcpy( output+160, hash5, 32 ); - memcpy( output+192, hash6, 32 ); - memcpy( output+224, hash7, 32 ); + memcpy( output, hash0, 64 ); + memcpy( output+64, hash1, 64 ); + memcpy( output+128, hash2, 64 ); + memcpy( output+192, hash3, 64 ); + memcpy( output+256, hash4, 64 ); + memcpy( output+320, hash5, 64 ); + memcpy( output+384, hash6, 64 ); + memcpy( output+448, hash7, 64 ); } +// x16-r,-s,-rt wrapper called directly by scanhash to repackage 512 bit +// hash to 256 bit final hash. +void x16r_8way_hash( void* output, const void* input ) +{ + uint8_t hash[64*8] __attribute__ ((aligned (128))); + x16r_8way_hash_generic( hash, input ); + + memcpy( output, hash, 32 ); + memcpy( output+32, hash+64, 32 ); + memcpy( output+64, hash+128, 32 ); + memcpy( output+96, hash+192, 32 ); + memcpy( output+128, hash+256, 32 ); + memcpy( output+160, hash+320, 32 ); + memcpy( output+192, hash+384, 32 ); + memcpy( output+224, hash+448, 32 ); +} + +// x16r only int scanhash_x16r_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr) { uint32_t hash[16*8] __attribute__ ((aligned (128))); uint32_t vdata[20*8] __attribute__ ((aligned (64))); - uint32_t vdata2[20*8] __attribute__ ((aligned (64))); - uint32_t edata[20] __attribute__ ((aligned (64))); uint32_t bedata1[2] __attribute__((aligned(64))); uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -496,66 +523,18 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce, bedata1[0] = bswap_32( pdata[1] ); bedata1[1] = bswap_32( pdata[2] ); + + static __thread uint32_t s_ntime = UINT32_MAX; const uint32_t ntime = bswap_32( pdata[17] ); if ( s_ntime != ntime ) { - x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder ); + x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); s_ntime = ntime; if ( opt_debug && !thr_id ) - applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime ); + applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); } - // Do midstate prehash on hash functions with block size <= 64 bytes. - const char elem = hashOrder[0]; - const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; - switch ( algo ) - { - case JH: - mm512_bswap32_intrlv80_8x64( vdata, pdata ); - jh512_8way_init( &x16r_ctx.jh ); - jh512_8way_update( &x16r_ctx.jh, vdata, 64 ); - break; - case SKEIN: - mm512_bswap32_intrlv80_8x64( vdata, pdata ); - skein512_8way_init( &x16r_ctx.skein ); - skein512_8way_update( &x16r_ctx.skein, vdata, 64 ); - break; - case LUFFA: - mm128_bswap32_80( edata, pdata ); - intrlv_4x128( vdata2, edata, edata, edata, edata, 640 ); - luffa_4way_init( &x16r_ctx.luffa, 512 ); - luffa_4way_update( &x16r_ctx.luffa, vdata2, 64 ); - rintrlv_4x128_8x64( vdata, vdata2, vdata2, 640 ); - break; - case CUBEHASH: - mm128_bswap32_80( edata, pdata ); - cubehashInit( &x16r_ctx.cube, 512, 16, 32 ); - cubehashUpdate( &x16r_ctx.cube, (const byte*)edata, 64 ); - intrlv_8x64( vdata, edata, edata, edata, edata, - edata, edata, edata, edata, 640 ); - break; - case HAMSI: - mm512_bswap32_intrlv80_8x64( vdata, pdata ); - hamsi512_8way_init( &x16r_ctx.hamsi ); - hamsi512_8way_update( &x16r_ctx.hamsi, vdata, 64 ); - break; - case SHABAL: - mm256_bswap32_intrlv80_8x32( vdata2, pdata ); - shabal512_8way_init( &x16r_ctx.shabal ); - shabal512_8way_update( &x16r_ctx.shabal, vdata2, 64 ); - rintrlv_8x32_8x64( vdata, vdata2, 640 ); - break; - case WHIRLPOOL: - mm128_bswap32_80( edata, pdata ); - sph_whirlpool_init( &x16r_ctx.whirlpool ); - sph_whirlpool( &x16r_ctx.whirlpool, edata, 64 ); - intrlv_8x64( vdata, edata, edata, edata, edata, - edata, edata, edata, edata, 640 ); - break; - default: - mm512_bswap32_intrlv80_8x64( vdata, pdata ); - } - + x16r_8way_prehash( vdata, pdata ); *noncev = mm512_intrlv_blend_32( _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0, n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); @@ -580,34 +559,62 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce, #elif defined (X16R_4WAY) -static __thread uint32_t s_ntime = UINT32_MAX; -static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 }; - -union _x16r_4way_context_overlay +void x16r_4way_prehash( void *vdata, void *pdata ) { - blake512_4way_context blake; - bmw512_4way_context bmw; - hashState_echo echo; - hashState_groestl groestl; - skein512_4way_context skein; - jh512_4way_context jh; - keccak512_4way_context keccak; - luffa_2way_context luffa; - hashState_luffa luffa1; - cubehashParam cube; - sph_shavite512_context shavite; - simd_2way_context simd; - hamsi512_4way_context hamsi; - sph_fugue512_context fugue; - shabal512_4way_context shabal; - sph_whirlpool_context whirlpool; - sha512_4way_context sha512; -} __attribute__ ((aligned (64))); -typedef union _x16r_4way_context_overlay x16r_4way_context_overlay; + uint32_t vdata2[20*4] __attribute__ ((aligned (64))); + uint32_t edata[20] __attribute__ ((aligned (64))); -static __thread x16r_4way_context_overlay x16r_ctx; + const char elem = x16r_hash_order[0]; + const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; -void x16r_4way_hash( void* output, const void* input ) + switch ( algo ) + { + case JH: + mm256_bswap32_intrlv80_4x64( vdata, pdata ); + jh512_4way_init( &x16r_ctx.jh ); + jh512_4way_update( &x16r_ctx.jh, vdata, 64 ); + break; + case SKEIN: + mm256_bswap32_intrlv80_4x64( vdata, pdata ); + skein512_4way_init( &x16r_ctx.skein ); + skein512_4way_update( &x16r_ctx.skein, vdata, 64 ); + break; + case LUFFA: + mm128_bswap32_80( edata, pdata ); + intrlv_2x128( vdata2, edata, edata, 640 ); + luffa_2way_init( &x16r_ctx.luffa, 512 ); + luffa_2way_update( &x16r_ctx.luffa, vdata2, 64 ); + rintrlv_2x128_4x64( vdata, vdata2, vdata2, 640 ); + break; + case CUBEHASH: + mm128_bswap32_80( edata, pdata ); + cubehashInit( &x16r_ctx.cube, 512, 16, 32 ); + cubehashUpdate( &x16r_ctx.cube, (const byte*)edata, 64 ); + intrlv_4x64( vdata, edata, edata, edata, edata, 640 ); + break; + case HAMSI: + mm256_bswap32_intrlv80_4x64( vdata, pdata ); + hamsi512_4way_init( &x16r_ctx.hamsi ); + hamsi512_4way_update( &x16r_ctx.hamsi, vdata, 64 ); + break; + case SHABAL: + mm128_bswap32_intrlv80_4x32( vdata2, pdata ); + shabal512_4way_init( &x16r_ctx.shabal ); + shabal512_4way_update( &x16r_ctx.shabal, vdata2, 64 ); + rintrlv_4x32_4x64( vdata, vdata2, 640 ); + break; + case WHIRLPOOL: + mm128_bswap32_80( edata, pdata ); + sph_whirlpool_init( &x16r_ctx.whirlpool ); + sph_whirlpool( &x16r_ctx.whirlpool, edata, 64 ); + intrlv_4x64( vdata, edata, edata, edata, edata, 640 ); + break; + default: + mm256_bswap32_intrlv80_4x64( vdata, pdata ); + } +} + +void x16r_4way_hash_generic( void* output, const void* input ) { uint32_t vhash[20*4] __attribute__ ((aligned (128))); uint32_t hash0[20] __attribute__ ((aligned (64))); @@ -626,7 +633,7 @@ void x16r_4way_hash( void* output, const void* input ) for ( int i = 0; i < 16; i++ ) { - const char elem = hashOrder[i]; + const char elem = x16r_hash_order[i]; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; switch ( algo ) @@ -698,11 +705,12 @@ void x16r_4way_hash( void* output, const void* input ) case LUFFA: if ( i == 0 ) { - intrlv_2x128( vhash, in0, in1, size<<3 ); - luffa512_2way_full( &ctx.luffa, vhash, vhash + (16<<1), 16 ); + intrlv_2x128( vhash, hash0, hash1, 640 ); + luffa_2way_update_close( &ctx.luffa, vhash, vhash + (16<<1), 16 ); dintrlv_2x128_512( hash0, hash1, vhash ); - intrlv_2x128( vhash, in2, in3, size<<3 ); - luffa512_2way_full( &ctx.luffa, vhash, vhash + (16<<1), 16 ); + intrlv_2x128( vhash, hash2, hash3, 640 ); + memcpy( &ctx, &x16r_ctx, sizeof(ctx) ); + luffa_2way_update_close( &ctx.luffa, vhash, vhash + (16<<1), 16 ); dintrlv_2x128_512( hash2, hash3, vhash ); } else @@ -863,10 +871,21 @@ void x16r_4way_hash( void* output, const void* input ) } size = 64; } - memcpy( output, hash0, 32 ); - memcpy( output+32, hash1, 32 ); - memcpy( output+64, hash2, 32 ); - memcpy( output+96, hash3, 32 ); + memcpy( output, hash0, 64 ); + memcpy( output+64, hash1, 64 ); + memcpy( output+128, hash2, 64 ); + memcpy( output+192, hash3, 64 ); +} + +void x16r_4way_hash( void* output, const void* input ) +{ + uint8_t hash[64*4] __attribute__ ((aligned (64))); + x16r_4way_hash_generic( hash, input ); + + memcpy( output, hash, 32 ); + memcpy( output+32, hash+64, 32 ); + memcpy( output+64, hash+128, 32 ); + memcpy( output+96, hash+192, 32 ); } int scanhash_x16r_4way( struct work *work, uint32_t max_nonce, @@ -874,8 +893,6 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce, { uint32_t hash[16*4] __attribute__ ((aligned (64))); uint32_t vdata[20*4] __attribute__ ((aligned (64))); - uint32_t vdata2[20*4] __attribute__ ((aligned (64))); - uint32_t edata[20] __attribute__ ((aligned (64))); uint32_t bedata1[2] __attribute__((aligned(64))); uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -891,67 +908,20 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce, bedata1[0] = bswap_32( pdata[1] ); bedata1[1] = bswap_32( pdata[2] ); + + static __thread uint32_t s_ntime = UINT32_MAX; const uint32_t ntime = bswap_32( pdata[17] ); if ( s_ntime != ntime ) { - x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder ); + x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); s_ntime = ntime; if ( opt_debug && !thr_id ) - applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime ); - } - - // Do midstate prehash on hash functions with block size <= 64 bytes. - const char elem = hashOrder[0]; - const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; - switch ( algo ) - { - case JH: - mm256_bswap32_intrlv80_4x64( vdata, pdata ); - jh512_4way_init( &x16r_ctx.jh ); - jh512_4way_update( &x16r_ctx.jh, vdata, 64 ); - break; - case SKEIN: - mm256_bswap32_intrlv80_4x64( vdata, pdata ); - skein512_4way_init( &x16r_ctx.skein ); - skein512_4way_update( &x16r_ctx.skein, vdata, 64 ); - break; - case LUFFA: - mm128_bswap32_80( edata, pdata ); - intrlv_2x128( vdata2, edata, edata, 640 ); - luffa_2way_init( &x16r_ctx.luffa, 512 ); - luffa_2way_update( &x16r_ctx.luffa, vdata2, 64 ); - rintrlv_2x128_4x64( vdata, vdata2, vdata2, 512 ); - break; - case CUBEHASH: - mm128_bswap32_80( edata, pdata ); - cubehashInit( &x16r_ctx.cube, 512, 16, 32 ); - cubehashUpdate( &x16r_ctx.cube, (const byte*)edata, 64 ); - intrlv_4x64( vdata, edata, edata, edata, edata, 640 ); - break; - case HAMSI: - mm256_bswap32_intrlv80_4x64( vdata, pdata ); - hamsi512_4way_init( &x16r_ctx.hamsi ); - hamsi512_4way_update( &x16r_ctx.hamsi, vdata, 64 ); - break; - case SHABAL: - mm128_bswap32_intrlv80_4x32( vdata2, pdata ); - shabal512_4way_init( &x16r_ctx.shabal ); - shabal512_4way_update( &x16r_ctx.shabal, vdata2, 64 ); - rintrlv_4x32_4x64( vdata, vdata2, 640 ); - break; - case WHIRLPOOL: - mm128_bswap32_80( edata, pdata ); - sph_whirlpool_init( &x16r_ctx.whirlpool ); - sph_whirlpool( &x16r_ctx.whirlpool, edata, 64 ); - intrlv_4x64( vdata, edata, edata, edata, edata, 640 ); - break; - default: - mm256_bswap32_intrlv80_4x64( vdata, pdata ); + applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); } + x16r_4way_prehash( vdata, pdata ); *noncev = mm256_intrlv_blend_32( _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); - do { x16r_4way_hash( hash, vdata ); diff --git a/algo/x16/x16r-gate.c b/algo/x16/x16r-gate.c index c438c1e..28ce579 100644 --- a/algo/x16/x16r-gate.c +++ b/algo/x16/x16r-gate.c @@ -1,7 +1,22 @@ #include "x16r-gate.h" +__thread char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ] = { 0 }; + void (*x16_r_s_getAlgoString) ( const uint8_t*, char* ) = NULL; +#if defined (X16R_8WAY) + +__thread x16r_8way_context_overlay x16r_ctx; + +#elif defined (X16R_4WAY) + +__thread x16r_4way_context_overlay x16r_ctx; + +#endif + +__thread x16r_context_overlay x16_ctx; + + void x16r_getAlgoString( const uint8_t* prevblock, char *output ) { char *sptr = output; @@ -207,15 +222,15 @@ void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) bool register_x16rt_algo( algo_gate_t* gate ) { -#if defined (X16RT_8WAY) +#if defined (X16R_8WAY) gate->scanhash = (void*)&scanhash_x16rt_8way; - gate->hash = (void*)&x16rt_8way_hash; -#elif defined (X16RT_4WAY) + gate->hash = (void*)&x16r_8way_hash; +#elif defined (X16R_4WAY) gate->scanhash = (void*)&scanhash_x16rt_4way; - gate->hash = (void*)&x16rt_4way_hash; + gate->hash = (void*)&x16r_4way_hash; #else gate->scanhash = (void*)&scanhash_x16rt; - gate->hash = (void*)&x16rt_hash; + gate->hash = (void*)&x16r_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; opt_target_factor = 256.0; @@ -224,15 +239,15 @@ bool register_x16rt_algo( algo_gate_t* gate ) bool register_x16rt_veil_algo( algo_gate_t* gate ) { -#if defined (X16RT_8WAY) +#if defined (X16R_8WAY) gate->scanhash = (void*)&scanhash_x16rt_8way; - gate->hash = (void*)&x16rt_8way_hash; -#elif defined (X16RT_4WAY) + gate->hash = (void*)&x16r_8way_hash; +#elif defined (X16R_4WAY) gate->scanhash = (void*)&scanhash_x16rt_4way; - gate->hash = (void*)&x16rt_4way_hash; + gate->hash = (void*)&x16r_4way_hash; #else gate->scanhash = (void*)&scanhash_x16rt; - gate->hash = (void*)&x16rt_hash; + gate->hash = (void*)&x16r_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; gate->build_extraheader = (void*)&veil_build_extraheader; @@ -247,7 +262,7 @@ bool register_x16rt_veil_algo( algo_gate_t* gate ) bool register_hex_algo( algo_gate_t* gate ) { gate->scanhash = (void*)&scanhash_hex; - gate->hash = (void*)&hex_hash; + gate->hash = (void*)&x16r_hash; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT; gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root; opt_target_factor = 128.0; @@ -260,13 +275,13 @@ bool register_hex_algo( algo_gate_t* gate ) bool register_x21s_algo( algo_gate_t* gate ) { -#if defined (X21S_8WAY) +#if defined (X16R_8WAY) gate->scanhash = (void*)&scanhash_x21s_8way; gate->hash = (void*)&x21s_8way_hash; gate->miner_thread_init = (void*)&x21s_8way_thread_init; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; -#elif defined (X21S_4WAY) +#elif defined (X16R_4WAY) gate->scanhash = (void*)&scanhash_x21s_4way; gate->hash = (void*)&x21s_4way_hash; gate->miner_thread_init = (void*)&x21s_4way_thread_init; diff --git a/algo/x16/x16r-gate.h b/algo/x16/x16r-gate.h index f86d069..d58aab8 100644 --- a/algo/x16/x16r-gate.h +++ b/algo/x16/x16r-gate.h @@ -5,29 +5,60 @@ #include "simd-utils.h" #include #include - -#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) - #define X16R_8WAY 1 -#elif defined(__AVX2__) && defined(__AES__) - #define X16R_4WAY 1 +#include "algo/blake/sph_blake.h" +#include "algo/bmw/sph_bmw.h" +#include "algo/groestl/sph_groestl.h" +#include "algo/jh/sph_jh.h" +#include "algo/keccak/sph_keccak.h" +#include "algo/skein/sph_skein.h" +#include "algo/shavite/sph_shavite.h" +#include "algo/luffa/luffa_for_sse2.h" +#include "algo/cubehash/cubehash_sse2.h" +#include "algo/simd/nist.h" +#include "algo/echo/sph_echo.h" +#include "algo/hamsi/sph_hamsi.h" +#include "algo/fugue/sph_fugue.h" +#include "algo/shabal/sph_shabal.h" +#include "algo/whirlpool/sph_whirlpool.h" +#include +#if defined(__AES__) + #include "algo/echo/aes_ni/hash_api.h" + #include "algo/groestl/aes_ni/hash-groestl.h" #endif +#if defined (__AVX2__) +#include "algo/blake/blake-hash-4way.h" +#include "algo/bmw/bmw-hash-4way.h" +#include "algo/groestl/aes_ni/hash-groestl.h" +#include "algo/skein/skein-hash-4way.h" +#include "algo/jh/jh-hash-4way.h" +#include "algo/keccak/keccak-hash-4way.h" +#include "algo/luffa/luffa-hash-2way.h" +#include "algo/simd/simd-hash-2way.h" +#include "algo/echo/aes_ni/hash_api.h" +#include "algo/hamsi/hamsi-hash-4way.h" +#include "algo/shabal/shabal-hash-4way.h" +#include "algo/sha/sha-hash-4way.h" +#if defined(__VAES__) + #include "algo/groestl/groestl512-hash-4way.h" + #include "algo/shavite/shavite-hash-4way.h" + #include "algo/echo/echo-hash-4way.h" +#endif +#endif // AVX2 #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) + + #define X16R_8WAY 1 #define X16RV2_8WAY 1 + #define X16RT_8WAY 1 + #define X21S_8WAY 1 + #elif defined(__AVX2__) && defined(__AES__) + #define X16RV2_4WAY 1 -#endif + #define X16RT_4WAY 1 + #define X21S_4WAY 1 + #define X16R_4WAY 1 -#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) - #define X16RT_8WAY 1 -#elif defined(__AVX2__) && defined(__AES__) - #define X16RT_4WAY 1 -#endif - -#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) - #define X21S_8WAY 1 -#elif defined(__AVX2__) && defined(__AES__) - #define X21S_4WAY 1 #endif enum x16r_Algo { @@ -50,6 +81,8 @@ enum x16r_Algo { X16R_HASH_FUNC_COUNT }; +extern __thread char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ]; + extern void (*x16_r_s_getAlgoString) ( const uint8_t*, char* ); void x16r_getAlgoString( const uint8_t *prevblock, char *output ); void x16s_getAlgoString( const uint8_t *prevblock, char *output ); @@ -67,25 +100,115 @@ bool register_x21s__algo( algo_gate_t* gate ); // x16r, x16s #if defined(X16R_8WAY) -void x16r_8way_hash( void *state, const void *input ); -int scanhash_x16r_8way( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ); +union _x16r_8way_context_overlay +{ + blake512_8way_context blake; + bmw512_8way_context bmw; + skein512_8way_context skein; + jh512_8way_context jh; + keccak512_8way_context keccak; + luffa_4way_context luffa; + cubehashParam cube; + simd_4way_context simd; + hamsi512_8way_context hamsi; + sph_fugue512_context fugue; + shabal512_8way_context shabal; + sph_whirlpool_context whirlpool; + sha512_8way_context sha512; +#if defined(__VAES__) + groestl512_4way_context groestl; + shavite512_4way_context shavite; + echo_4way_context echo; +#else + hashState_groestl groestl; + sph_shavite512_context shavite; + hashState_echo echo; +#endif +} __attribute__ ((aligned (64))); + +typedef union _x16r_8way_context_overlay x16r_8way_context_overlay; + +extern __thread x16r_8way_context_overlay x16r_ctx; + +void x16r_8way_prehash( void *, void * ); +void x16r_8way_hash_generic( void *, const void * ); +void x16r_8way_hash( void *, const void * ); +int scanhash_x16r_8way( struct work *, uint32_t , + uint64_t *, struct thr_info * ); +extern __thread x16r_8way_context_overlay x16r_ctx; #elif defined(X16R_4WAY) -void x16r_4way_hash( void *state, const void *input ); -int scanhash_x16r_4way( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ); +union _x16r_4way_context_overlay +{ + blake512_4way_context blake; + bmw512_4way_context bmw; + hashState_echo echo; + hashState_groestl groestl; + skein512_4way_context skein; + jh512_4way_context jh; + keccak512_4way_context keccak; + luffa_2way_context luffa; + hashState_luffa luffa1; + cubehashParam cube; + sph_shavite512_context shavite; + simd_2way_context simd; + hamsi512_4way_context hamsi; + sph_fugue512_context fugue; + shabal512_4way_context shabal; + sph_whirlpool_context whirlpool; + sha512_4way_context sha512; +} __attribute__ ((aligned (64))); -#else +typedef union _x16r_4way_context_overlay x16r_4way_context_overlay; -void x16r_hash( void *state, const void *input ); -int scanhash_x16r( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ); +extern __thread x16r_4way_context_overlay x16r_ctx; + +void x16r_4way_prehash( void *, void * ); +void x16r_4way_hash_generic( void *, const void * ); +void x16r_4way_hash( void *, const void * ); +int scanhash_x16r_4way( struct work *, uint32_t, + uint64_t *, struct thr_info * ); +extern __thread x16r_4way_context_overlay x16r_ctx; #endif +// needed for hex +union _x16r_context_overlay +{ +#if defined(__AES__) + hashState_echo echo; + hashState_groestl groestl; +#else + sph_groestl512_context groestl; + sph_echo512_context echo; +#endif + sph_blake512_context blake; + sph_bmw512_context bmw; + sph_skein512_context skein; + sph_jh512_context jh; + sph_keccak512_context keccak; + hashState_luffa luffa; + cubehashParam cube; + sph_shavite512_context shavite; + hashState_sd simd; + sph_hamsi512_context hamsi; + sph_fugue512_context fugue; + sph_shabal512_context shabal; + sph_whirlpool_context whirlpool; + SHA512_CTX sha512; +} __attribute__ ((aligned (64))); + +typedef union _x16r_context_overlay x16r_context_overlay; + +extern __thread x16r_context_overlay x16_ctx; + +void x16r_prehash( void *, void * ); +void x16r_hash_generic( void *, const void * ); +void x16r_hash( void *, const void * ); +int scanhash_x16r( struct work *, uint32_t, uint64_t *, struct thr_info * ); + // x16Rv2 #if defined(X16RV2_8WAY) @@ -108,35 +231,35 @@ int scanhash_x16rv2( struct work *work, uint32_t max_nonce, #endif // x16rt, veil -#if defined(X16RT_8WAY) +#if defined(X16R_8WAY) -void x16rt_8way_hash( void *state, const void *input ); +//void x16rt_8way_hash( void *state, const void *input ); int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); -#elif defined(X16RT_4WAY) +#elif defined(X16R_4WAY) -void x16rt_4way_hash( void *state, const void *input ); +//void x16rt_4way_hash( void *state, const void *input ); int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #else -void x16rt_hash( void *state, const void *input ); +//void x16rt_hash( void *state, const void *input ); int scanhash_x16rt( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #endif // x21s -#if defined(X21S_8WAY) +#if defined(X16R_8WAY) void x21s_8way_hash( void *state, const void *input ); int scanhash_x21s_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); bool x21s_8way_thread_init(); -#elif defined(X21S_4WAY) +#elif defined(X16R_4WAY) void x21s_4way_hash( void *state, const void *input ); int scanhash_x21s_4way( struct work *work, uint32_t max_nonce, @@ -152,7 +275,7 @@ bool x21s_thread_init(); #endif -void hex_hash( void *state, const void *input ); +//void hex_hash( void *state, const void *input ); int scanhash_hex( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); diff --git a/algo/x16/x16r.c b/algo/x16/x16r.c index 6de195a..503a9f0 100644 --- a/algo/x16/x16r.c +++ b/algo/x16/x16r.c @@ -9,72 +9,56 @@ #include #include #include -#include "algo/blake/sph_blake.h" -#include "algo/bmw/sph_bmw.h" -#include "algo/groestl/sph_groestl.h" -#include "algo/jh/sph_jh.h" -#include "algo/keccak/sph_keccak.h" -#include "algo/skein/sph_skein.h" -#include "algo/shavite/sph_shavite.h" -#include "algo/luffa/luffa_for_sse2.h" -#include "algo/cubehash/cubehash_sse2.h" -#include "algo/simd/nist.h" -#include "algo/echo/sph_echo.h" -#include "algo/hamsi/sph_hamsi.h" -#include "algo/fugue/sph_fugue.h" -#include "algo/shabal/sph_shabal.h" -#include "algo/whirlpool/sph_whirlpool.h" -#include -#if defined(__AES__) - #include "algo/echo/aes_ni/hash_api.h" - #include "algo/groestl/aes_ni/hash-groestl.h" -#endif -static __thread uint32_t s_ntime = UINT32_MAX; -static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 }; - -union _x16r_context_overlay +void x16r_prehash( void *edata, void *pdata ) { -#if defined(__AES__) - hashState_echo echo; - hashState_groestl groestl; -#else - sph_groestl512_context groestl; - sph_echo512_context echo; -#endif - sph_blake512_context blake; - sph_bmw512_context bmw; - sph_skein512_context skein; - sph_jh512_context jh; - sph_keccak512_context keccak; - hashState_luffa luffa; - cubehashParam cube; - sph_shavite512_context shavite; - hashState_sd simd; - sph_hamsi512_context hamsi; - sph_fugue512_context fugue; - sph_shabal512_context shabal; - sph_whirlpool_context whirlpool; - SHA512_CTX sha512; -}; -typedef union _x16r_context_overlay x16r_context_overlay; + const char elem = x16r_hash_order[0]; + const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; -void x16r_hash( void* output, const void* input ) + switch ( algo ) + { + case JH: + sph_jh512_init( &x16_ctx.jh ); + sph_jh512( &x16_ctx.jh, edata, 64 ); + break; + case SKEIN: + sph_skein512_init( &x16_ctx.skein ); + sph_skein512( &x16_ctx.skein, edata, 64 ); + break; + case LUFFA: + init_luffa( &x16_ctx.luffa, 512 ); + update_luffa( &x16_ctx.luffa, (const BitSequence*)edata, 64 ); + break; + case CUBEHASH: + cubehashInit( &x16_ctx.cube, 512, 16, 32 ); + cubehashUpdate( &x16_ctx.cube, (const byte*)edata, 64 ); + break; + case HAMSI: + sph_hamsi512_init( &x16_ctx.hamsi ); + sph_hamsi512( &x16_ctx.hamsi, edata, 64 ); + break; + case SHABAL: + sph_shabal512_init( &x16_ctx.shabal ); + sph_shabal512( &x16_ctx.shabal, edata, 64 ); + break; + case WHIRLPOOL: + sph_whirlpool_init( &x16_ctx.whirlpool ); + sph_whirlpool( &x16_ctx.whirlpool, edata, 64 ); + break; + } +} + +void x16r_hash_generic( void* output, const void* input ) { uint32_t _ALIGN(128) hash[16]; x16r_context_overlay ctx; + memcpy( &ctx, &x16_ctx, sizeof(ctx) ); void *in = (void*) input; int size = 80; -/* - if ( s_ntime == UINT32_MAX ) - { - const uint8_t* in8 = (uint8_t*) input; - x16_r_s_getAlgoString( &in8[4], hashOrder ); - } -*/ + for ( int i = 0; i < 16; i++ ) { - const char elem = hashOrder[i]; + const char elem = x16r_hash_order[i]; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; switch ( algo ) @@ -91,23 +75,21 @@ void x16r_hash( void* output, const void* input ) break; case GROESTL: #if defined(__AES__) - init_groestl( &ctx.groestl, 64 ); - update_and_final_groestl( &ctx.groestl, (char*)hash, - (const char*)in, size<<3 ); + groestl512_full( &ctx.groestl, (char*)hash, (char*)in, size<<3 ); #else sph_groestl512_init( &ctx.groestl ); sph_groestl512( &ctx.groestl, in, size ); sph_groestl512_close(&ctx.groestl, hash); #endif break; - case SKEIN: - sph_skein512_init( &ctx.skein ); - sph_skein512( &ctx.skein, in, size ); - sph_skein512_close( &ctx.skein, hash ); - break; case JH: - sph_jh512_init( &ctx.jh ); - sph_jh512(&ctx.jh, in, size ); + if ( i == 0 ) + sph_jh512(&ctx.jh, in+64, 16 ); + else + { + sph_jh512_init( &ctx.jh ); + sph_jh512(&ctx.jh, in, size ); + } sph_jh512_close(&ctx.jh, hash ); break; case KECCAK: @@ -115,15 +97,31 @@ void x16r_hash( void* output, const void* input ) sph_keccak512( &ctx.keccak, in, size ); sph_keccak512_close( &ctx.keccak, hash ); break; + case SKEIN: + if ( i == 0 ) + sph_skein512(&ctx.skein, in+64, 16 ); + else + { + sph_skein512_init( &ctx.skein ); + sph_skein512( &ctx.skein, in, size ); + } + sph_skein512_close( &ctx.skein, hash ); + break; case LUFFA: - init_luffa( &ctx.luffa, 512 ); - update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, - (const BitSequence*)in, size ); + if ( i == 0 ) + update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, + (const BitSequence*)in+64, 16 ); + else + luffa_full( &ctx.luffa, (BitSequence*)hash, 512, + (const BitSequence*)in, size ); break; case CUBEHASH: - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash, - (const byte*)in, size ); + if ( i == 0 ) + cubehashUpdateDigest( &ctx.cube, (byte*)hash, + (const byte*)in+64, 16 ); + else + cubehash_full( &ctx.cube, (byte*)hash, 512, + (byte*)in, size ); break; case SHAVITE: sph_shavite512_init( &ctx.shavite ); @@ -131,93 +129,109 @@ void x16r_hash( void* output, const void* input ) sph_shavite512_close( &ctx.shavite, hash ); break; case SIMD: - init_sd( &ctx.simd, 512 ); - update_final_sd( &ctx.simd, (BitSequence *)hash, - (const BitSequence*)in, size<<3 ); + simd_full( &ctx.simd, (BitSequence *)hash, + (const BitSequence*)in, size<<3 ); break; case ECHO: #if defined(__AES__) - init_echo( &ctx.echo, 512 ); - update_final_echo ( &ctx.echo, (BitSequence *)hash, - (const BitSequence*)in, size<<3 ); + echo_full( &ctx.echo, hash, 512, in, size ); #else - sph_echo512_init( &ctx.echo ); - sph_echo512( &ctx.echo, in, size ); - sph_echo512_close( &ctx.echo, hash ); + sph_echo512_init( &ctx.echo ); + sph_echo512( &ctx.echo, in, size ); + sph_echo512_close( &ctx.echo, hash ); #endif break; case HAMSI: - sph_hamsi512_init( &ctx.hamsi ); - sph_hamsi512( &ctx.hamsi, in, size ); - sph_hamsi512_close( &ctx.hamsi, hash ); + if ( i == 0 ) + sph_hamsi512( &ctx.hamsi, in+64, 16 ); + else + { + sph_hamsi512_init( &ctx.hamsi ); + sph_hamsi512( &ctx.hamsi, in, size ); + } + sph_hamsi512_close( &ctx.hamsi, hash ); break; case FUGUE: - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in, size ); - sph_fugue512_close( &ctx.fugue, hash ); + sph_fugue512_init( &ctx.fugue ); + sph_fugue512( &ctx.fugue, in, size ); + sph_fugue512_close( &ctx.fugue, hash ); break; case SHABAL: - sph_shabal512_init( &ctx.shabal ); - sph_shabal512( &ctx.shabal, in, size ); - sph_shabal512_close( &ctx.shabal, hash ); + if ( i == 0 ) + sph_shabal512( &ctx.shabal, in+64, 16 ); + else + { + sph_shabal512_init( &ctx.shabal ); + sph_shabal512( &ctx.shabal, in, size ); + } + sph_shabal512_close( &ctx.shabal, hash ); break; case WHIRLPOOL: - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in, size ); - sph_whirlpool_close( &ctx.whirlpool, hash ); + if ( i == 0 ) + sph_whirlpool( &ctx.whirlpool, in+64, 16 ); + else + { + sph_whirlpool_init( &ctx.whirlpool ); + sph_whirlpool( &ctx.whirlpool, in, size ); + } + sph_whirlpool_close( &ctx.whirlpool, hash ); break; case SHA_512: - SHA512_Init( &ctx.sha512 ); - SHA512_Update( &ctx.sha512, in, size ); - SHA512_Final( (unsigned char*) hash, &ctx.sha512 ); + SHA512_Init( &ctx.sha512 ); + SHA512_Update( &ctx.sha512, in, size ); + SHA512_Final( (unsigned char*) hash, &ctx.sha512 ); break; } in = (void*) hash; size = 64; } - memcpy(output, hash, 32); + memcpy( output, hash, 64 ); +} + +void x16r_hash( void* output, const void* input ) +{ + uint8_t hash[64] __attribute__ ((aligned (64))); + x16r_hash_generic( hash, input ); + + memcpy( output, hash, 32 ); } int scanhash_x16r( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(128) hash32[8]; - uint32_t _ALIGN(128) endiandata[20]; + uint32_t _ALIGN(128) edata[20]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; - const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; - int thr_id = mythr->id; // thr_id arg is deprecated + const int thr_id = mythr->id; uint32_t nonce = first_nonce; - volatile uint8_t *restart = &(work_restart[thr_id].restart); + volatile uint8_t *restart = &( work_restart[thr_id].restart ); + const bool bench = opt_benchmark; + if ( bench ) ptarget[7] = 0x0cff; - casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) ); - casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) ); - casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) ); - casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) ); - casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) ); + mm128_bswap32_80( edata, pdata ); + static __thread uint32_t s_ntime = UINT32_MAX; if ( s_ntime != pdata[17] ) { uint32_t ntime = swab32(pdata[17]); - x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder ); + x16_r_s_getAlgoString( (const uint8_t*)(&edata[1]), x16r_hash_order ); s_ntime = ntime; if ( opt_debug && !thr_id ) - applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime ); + applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime ); } - if ( opt_benchmark ) - ptarget[7] = 0x0cff; + x16r_prehash( edata, pdata ); do { - be32enc( &endiandata[19], nonce ); - x16r_hash( hash32, endiandata ); + edata[19] = nonce; + x16r_hash( hash32, edata ); - if ( hash32[7] <= Htarg ) - if (fulltest( hash32, ptarget ) && !opt_benchmark ) + if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) ) { - pdata[19] = nonce; + pdata[19] = bswap_32( nonce ); submit_solution( work, hash32, mythr ); } nonce++; @@ -226,3 +240,4 @@ int scanhash_x16r( struct work *work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } + diff --git a/algo/x16/x16rt-4way.c b/algo/x16/x16rt-4way.c index d6da77c..da88d27 100644 --- a/algo/x16/x16rt-4way.c +++ b/algo/x16/x16rt-4way.c @@ -2,481 +2,14 @@ #include #include #include -#include "algo/blake/blake-hash-4way.h" -#include "algo/bmw/bmw-hash-4way.h" -#include "algo/groestl/aes_ni/hash-groestl.h" -#include "algo/groestl/aes_ni/hash-groestl.h" -#include "algo/skein/skein-hash-4way.h" -#include "algo/jh/jh-hash-4way.h" -#include "algo/keccak/keccak-hash-4way.h" -#include "algo/shavite/sph_shavite.h" -#include "algo/luffa/luffa-hash-2way.h" -#include "algo/luffa/luffa_for_sse2.h" -#include "algo/cubehash/cubehash_sse2.h" -#include "algo/cubehash/cube-hash-2way.h" -#include "algo/simd/simd-hash-2way.h" -#include "algo/echo/aes_ni/hash_api.h" -#include "algo/hamsi/hamsi-hash-4way.h" -#include "algo/fugue/sph_fugue.h" -#include "algo/shabal/shabal-hash-4way.h" -#include "algo/whirlpool/sph_whirlpool.h" -#include "algo/sha/sha-hash-4way.h" -#if defined(__VAES__) - #include "algo/groestl/groestl512-hash-4way.h" - #include "algo/shavite/shavite-hash-4way.h" - #include "algo/echo/echo-hash-4way.h" -#endif -#if defined (X16RT_8WAY) - -static __thread uint32_t s_ntime = UINT32_MAX; -static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 }; - -union _x16rt_8way_context_overlay -{ - blake512_8way_context blake; - bmw512_8way_context bmw; - skein512_8way_context skein; - jh512_8way_context jh; - keccak512_8way_context keccak; - luffa_4way_context luffa; - cubehashParam cube; -// cube_4way_context cube; - simd_4way_context simd; - hamsi512_8way_context hamsi; - sph_fugue512_context fugue; - shabal512_8way_context shabal; - sph_whirlpool_context whirlpool; - sha512_8way_context sha512; -#if defined(__VAES__) - groestl512_4way_context groestl; - shavite512_4way_context shavite; - echo_4way_context echo; -#else - hashState_groestl groestl; - sph_shavite512_context shavite; - hashState_echo echo; -#endif -} __attribute__ ((aligned (64))); - -typedef union _x16rt_8way_context_overlay x16rt_8way_context_overlay; - -static __thread x16rt_8way_context_overlay x16rt_ctx; - -void x16rt_8way_hash( void* output, const void* input ) -{ - uint32_t vhash[20*8] __attribute__ ((aligned (128))); - uint32_t hash0[20] __attribute__ ((aligned (64))); - uint32_t hash1[20] __attribute__ ((aligned (64))); - uint32_t hash2[20] __attribute__ ((aligned (64))); - uint32_t hash3[20] __attribute__ ((aligned (64))); - uint32_t hash4[20] __attribute__ ((aligned (64))); - uint32_t hash5[20] __attribute__ ((aligned (64))); - uint32_t hash6[20] __attribute__ ((aligned (64))); - uint32_t hash7[20] __attribute__ ((aligned (64))); - x16rt_8way_context_overlay ctx; - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - void *in0 = (void*) hash0; - void *in1 = (void*) hash1; - void *in2 = (void*) hash2; - void *in3 = (void*) hash3; - void *in4 = (void*) hash4; - void *in5 = (void*) hash5; - void *in6 = (void*) hash6; - void *in7 = (void*) hash7; - int size = 80; - - dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, - input, 640 ); - - for ( int i = 0; i < 16; i++ ) - { - const char elem = hashOrder[i]; - const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; - - switch ( algo ) - { - case BLAKE: - if ( i == 0 ) - blake512_8way_full( &ctx.blake, vhash, input, size ); - else - { - intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - blake512_8way_full( &ctx.blake, vhash, vhash, size ); - } - dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, - hash6, hash7, vhash ); - break; - case BMW: - bmw512_8way_init( &ctx.bmw ); - if ( i == 0 ) - bmw512_8way_update( &ctx.bmw, input, size ); - else - { - intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - bmw512_8way_update( &ctx.bmw, vhash, size ); - } - bmw512_8way_close( &ctx.bmw, vhash ); - dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, - hash7, vhash ); - break; - case GROESTL: -#if defined(__VAES__) - intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 ); - groestl512_4way_init( &ctx.groestl, 64 ); - groestl512_4way_update_close( &ctx.groestl, vhash, vhash, size<<3 ); - dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash ); - intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 ); - groestl512_4way_init( &ctx.groestl, 64 ); - groestl512_4way_update_close( &ctx.groestl, vhash, vhash, size<<3 ); - dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash ); -#else - groestl512_full( &ctx.groestl, (char*)hash0, (char*)in0, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash1, (char*)in1, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash2, (char*)in2, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash3, (char*)in3, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash4, (char*)in4, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash5, (char*)in5, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash6, (char*)in6, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash7, (char*)in7, size<<3 ); -#endif - break; - case JH: - if ( i == 0 ) - jh512_8way_update( &ctx.jh, input + (64<<3), 16 ); - else - { - intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - jh512_8way_init( &ctx.jh ); - jh512_8way_update( &ctx.jh, vhash, size ); - } - jh512_8way_close( &ctx.jh, vhash ); - dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, - hash7, vhash ); - break; - case KECCAK: - keccak512_8way_init( &ctx.keccak ); - if ( i == 0 ) - keccak512_8way_update( &ctx.keccak, input, size ); - else - { - intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - keccak512_8way_update( &ctx.keccak, vhash, size ); - } - keccak512_8way_close( &ctx.keccak, vhash ); - dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, - hash7, vhash ); - break; - case SKEIN: - if ( i == 0 ) - skein512_8way_update( &ctx.skein, input + (64<<3), 16 ); - else - { - intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - skein512_8way_init( &ctx.skein ); - skein512_8way_update( &ctx.skein, vhash, size ); - } - skein512_8way_close( &ctx.skein, vhash ); - dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, - hash7, vhash ); - break; - case LUFFA: - if ( i == 0 ) - { - intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 ); - luffa_4way_update_close( &ctx.luffa, vhash, - vhash + (16<<2), 16 ); - dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 ); - luffa_4way_update_close( &ctx.luffa, vhash, - vhash + (16<<2), 16 ); - dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash ); - } - else - { - intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 ); - luffa512_4way_full( &ctx.luffa, vhash, vhash, size ); - dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash ); - intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 ); - luffa512_4way_full( &ctx.luffa, vhash, vhash, size ); - dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash ); - } - break; - case CUBEHASH: - if ( i == 0 ) - { - cubehashUpdateDigest( &ctx.cube, (byte*)hash0, - (const byte*)in0 + 64, 16 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash1, - (const byte*)in1 + 64, 16 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash2, - (const byte*)in2 + 64, 16 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash3, - (const byte*)in3 + 64, 16 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash4, - (const byte*)in4 + 64, 16 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash5, - (const byte*)in5 + 64, 16 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash6, - (const byte*)in6 + 64, 16 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash7, - (const byte*)in7 + 64, 16 ); - } - else - { - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash0, - (const byte*)in0, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash1, - (const byte*)in1, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash2, - (const byte*)in2, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash3, - (const byte*)in3, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash4, - (const byte*)in4, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash5, - (const byte*)in5, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash6, - (const byte*)in6, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash7, - (const byte*)in7, size ); - } - break; - case SHAVITE: -#if defined(__VAES__) - intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 ); - shavite512_4way_full( &ctx.shavite, vhash, vhash, size ); - dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash ); - intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 ); - shavite512_4way_full( &ctx.shavite, vhash, vhash, size ); - dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash ); -#else - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in0, size ); - sph_shavite512_close( &ctx.shavite, hash0 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in1, size ); - sph_shavite512_close( &ctx.shavite, hash1 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in2, size ); - sph_shavite512_close( &ctx.shavite, hash2 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in3, size ); - sph_shavite512_close( &ctx.shavite, hash3 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in4, size ); - sph_shavite512_close( &ctx.shavite, hash4 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in5, size ); - sph_shavite512_close( &ctx.shavite, hash5 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in6, size ); - sph_shavite512_close( &ctx.shavite, hash6 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in7, size ); - sph_shavite512_close( &ctx.shavite, hash7 ); -#endif - break; - case SIMD: - intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 ); - simd512_4way_full( &ctx.simd, vhash, vhash, size ); - dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash ); - intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 ); - simd512_4way_full( &ctx.simd, vhash, vhash, size ); - dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash ); - break; - case ECHO: -#if defined(__VAES__) - intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 ); - echo_4way_full( &ctx.echo, vhash, 512, vhash, size ); - dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash ); - intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 ); - echo_4way_full( &ctx.echo, vhash, 512, vhash, size ); - dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash ); -#else - echo_full( &ctx.echo, (BitSequence *)hash0, 512, - (const BitSequence *)in0, size ); - echo_full( &ctx.echo, (BitSequence *)hash1, 512, - (const BitSequence *)in1, size ); - echo_full( &ctx.echo, (BitSequence *)hash2, 512, - (const BitSequence *)in2, size ); - echo_full( &ctx.echo, (BitSequence *)hash3, 512, - (const BitSequence *)in3, size ); - echo_full( &ctx.echo, (BitSequence *)hash4, 512, - (const BitSequence *)in4, size ); - echo_full( &ctx.echo, (BitSequence *)hash5, 512, - (const BitSequence *)in5, size ); - echo_full( &ctx.echo, (BitSequence *)hash6, 512, - (const BitSequence *)in6, size ); - echo_full( &ctx.echo, (BitSequence *)hash7, 512, - (const BitSequence *)in7, size ); -#endif - break; - case HAMSI: - if ( i == 0 ) - hamsi512_8way_update( &ctx.hamsi, input + (64<<3), 16 ); - else - { - intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - hamsi512_8way_init( &ctx.hamsi ); - hamsi512_8way_update( &ctx.hamsi, vhash, size ); - } - hamsi512_8way_close( &ctx.hamsi, vhash ); - dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, - hash7, vhash ); - break; - case FUGUE: - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in0, size ); - sph_fugue512_close( &ctx.fugue, hash0 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in1, size ); - sph_fugue512_close( &ctx.fugue, hash1 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in2, size ); - sph_fugue512_close( &ctx.fugue, hash2 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in3, size ); - sph_fugue512_close( &ctx.fugue, hash3 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in4, size ); - sph_fugue512_close( &ctx.fugue, hash4 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in5, size ); - sph_fugue512_close( &ctx.fugue, hash5 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in6, size ); - sph_fugue512_close( &ctx.fugue, hash6 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in7, size ); - sph_fugue512_close( &ctx.fugue, hash7 ); - break; - case SHABAL: - intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - if ( i == 0 ) - shabal512_8way_update( &ctx.shabal, vhash + (16<<3), 16 ); - else - { - shabal512_8way_init( &ctx.shabal ); - shabal512_8way_update( &ctx.shabal, vhash, size ); - } - shabal512_8way_close( &ctx.shabal, vhash ); - dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, - hash7, vhash ); - break; - case WHIRLPOOL: - if ( i == 0 ) - { - sph_whirlpool( &ctx.whirlpool, in0 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash0 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in1 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash1 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in2 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash2 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in3 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash3 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in4 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash4 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in5 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash5 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in6 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash6 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in7 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash7 ); - } - else - { - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in0, size ); - sph_whirlpool_close( &ctx.whirlpool, hash0 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in1, size ); - sph_whirlpool_close( &ctx.whirlpool, hash1 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in2, size ); - sph_whirlpool_close( &ctx.whirlpool, hash2 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in3, size ); - sph_whirlpool_close( &ctx.whirlpool, hash3 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in4, size ); - sph_whirlpool_close( &ctx.whirlpool, hash4 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in5, size ); - sph_whirlpool_close( &ctx.whirlpool, hash5 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in6, size ); - sph_whirlpool_close( &ctx.whirlpool, hash6 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in7, size ); - sph_whirlpool_close( &ctx.whirlpool, hash7 ); - } - break; - case SHA_512: - sha512_8way_init( &ctx.sha512 ); - if ( i == 0 ) - sha512_8way_update( &ctx.sha512, input, size ); - else - { - intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - sha512_8way_update( &ctx.sha512, vhash, size ); - } - sha512_8way_close( &ctx.sha512, vhash ); - dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, - hash7, vhash ); - break; - } - size = 64; - } - - memcpy( output, hash0, 32 ); - memcpy( output+32, hash1, 32 ); - memcpy( output+64, hash2, 32 ); - memcpy( output+96, hash3, 32 ); - memcpy( output+128, hash4, 32 ); - memcpy( output+160, hash5, 32 ); - memcpy( output+192, hash6, 32 ); - memcpy( output+224, hash7, 32 ); -} +#if defined (X16R_8WAY) int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr) { uint32_t hash[16*8] __attribute__ ((aligned (128))); uint32_t vdata[20*8] __attribute__ ((aligned (64))); - uint32_t vdata2[20*8] __attribute__ ((aligned (64))); - uint32_t edata[20] __attribute__ ((aligned (64))); uint32_t _ALIGN(64) timeHash[8*8]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -490,74 +23,25 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce, if ( bench ) ptarget[7] = 0x0cff; + static __thread uint32_t s_ntime = UINT32_MAX; uint32_t ntime = bswap_32( pdata[17] ); if ( s_ntime != ntime ) { x16rt_getTimeHash( ntime, &timeHash ); - x16rt_getAlgoString( &timeHash[0], hashOrder ); + x16rt_getAlgoString( &timeHash[0], x16r_hash_order ); s_ntime = ntime; if ( opt_debug && !thr_id ) applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)", - hashOrder, ntime, timeHash ); - } - - // Do midstate prehash on hash functions with block size <= 64 bytes. - const char elem = hashOrder[0]; - const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; - switch ( algo ) - { - case JH: - mm512_bswap32_intrlv80_8x64( vdata, pdata ); - jh512_8way_init( &x16rt_ctx.jh ); - jh512_8way_update( &x16rt_ctx.jh, vdata, 64 ); - break; - case SKEIN: - mm512_bswap32_intrlv80_8x64( vdata, pdata ); - skein512_8way_init( &x16rt_ctx.skein ); - skein512_8way_update( &x16rt_ctx.skein, vdata, 64 ); - break; - case LUFFA: - mm128_bswap32_80( edata, pdata ); - intrlv_4x128( vdata2, edata, edata, edata, edata, 640 ); - luffa_4way_init( &x16rt_ctx.luffa, 512 ); - luffa_4way_update( &x16rt_ctx.luffa, vdata2, 64 ); - rintrlv_4x128_8x64( vdata, vdata2, vdata2, 640 ); - break; - case CUBEHASH: - mm128_bswap32_80( edata, pdata ); - cubehashInit( &x16rt_ctx.cube, 512, 16, 32 ); - cubehashUpdate( &x16rt_ctx.cube, (const byte*)edata, 64 ); - intrlv_8x64( vdata, edata, edata, edata, edata, - edata, edata, edata, edata, 640 ); - break; - case HAMSI: - mm512_bswap32_intrlv80_8x64( vdata, pdata ); - hamsi512_8way_init( &x16rt_ctx.hamsi ); - hamsi512_8way_update( &x16rt_ctx.hamsi, vdata, 64 ); - break; - case SHABAL: - mm256_bswap32_intrlv80_8x32( vdata2, pdata ); - shabal512_8way_init( &x16rt_ctx.shabal ); - shabal512_8way_update( &x16rt_ctx.shabal, vdata2, 64 ); - rintrlv_8x32_8x64( vdata, vdata2, 640 ); - break; - case WHIRLPOOL: - mm128_bswap32_80( edata, pdata ); - sph_whirlpool_init( &x16rt_ctx.whirlpool ); - sph_whirlpool( &x16rt_ctx.whirlpool, edata, 64 ); - intrlv_8x64( vdata, edata, edata, edata, edata, - edata, edata, edata, edata, 640 ); - break; - default: - mm512_bswap32_intrlv80_8x64( vdata, pdata ); + x16r_hash_order, ntime, timeHash ); } + x16r_8way_prehash( vdata, pdata ); *noncev = mm512_intrlv_blend_32( _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0, n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); do { - x16rt_8way_hash( hash, vdata ); + x16r_8way_hash( hash, vdata ); for ( int i = 0; i < 8; i++ ) if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) ) @@ -574,313 +58,13 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce, return 0; } -#elif defined (X16RT_4WAY) - -static __thread uint32_t s_ntime = UINT32_MAX; -static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 }; - -union _x16rt_4way_context_overlay -{ - blake512_4way_context blake; - bmw512_4way_context bmw; - hashState_echo echo; - hashState_groestl groestl; - skein512_4way_context skein; - jh512_4way_context jh; - keccak512_4way_context keccak; - luffa_2way_context luffa; - hashState_luffa luffa1; - cubehashParam cube; - sph_shavite512_context shavite; - simd_2way_context simd; - hamsi512_4way_context hamsi; - sph_fugue512_context fugue; - shabal512_4way_context shabal; - sph_whirlpool_context whirlpool; - sha512_4way_context sha512; -}; -typedef union _x16rt_4way_context_overlay x16rt_4way_context_overlay; - -static __thread x16rt_4way_context_overlay x16rt_ctx; - -void x16rt_4way_hash( void* output, const void* input ) -{ - uint32_t hash0[20] __attribute__ ((aligned (64))); - uint32_t hash1[20] __attribute__ ((aligned (64))); - uint32_t hash2[20] __attribute__ ((aligned (64))); - uint32_t hash3[20] __attribute__ ((aligned (64))); - uint32_t vhash[20*4] __attribute__ ((aligned (64))); - x16rt_4way_context_overlay ctx; - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - void *in0 = (void*) hash0; - void *in1 = (void*) hash1; - void *in2 = (void*) hash2; - void *in3 = (void*) hash3; - int size = 80; - - dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 ); - - // Input data is both 64 bit interleaved (input) - // and deinterleaved in inp0-3. First function has no need re-interleave. - for ( int i = 0; i < 16; i++ ) - { - const char elem = hashOrder[i]; - const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; - - switch ( algo ) - { - case BLAKE: - if ( i == 0 ) - blake512_4way_full( &ctx.blake, vhash, input, size ); - else - { - intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); - blake512_4way_full( &ctx.blake, vhash, vhash, size ); - } - dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - case BMW: - bmw512_4way_init( &ctx.bmw ); - if ( i == 0 ) - bmw512_4way_update( &ctx.bmw, input, size ); - else - { - intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); - bmw512_4way_update( &ctx.bmw, vhash, size ); - } - bmw512_4way_close( &ctx.bmw, vhash ); - dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - case GROESTL: - groestl512_full( &ctx.groestl, (char*)hash0, (char*)in0, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash1, (char*)in1, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash2, (char*)in2, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash3, (char*)in3, size<<3 ); - break; - case JH: - if ( i == 0 ) - jh512_4way_update( &ctx.jh, input + (64<<2), 16 ); - else - { - intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); - jh512_4way_init( &ctx.jh ); - jh512_4way_update( &ctx.jh, vhash, size ); - } - jh512_4way_close( &ctx.jh, vhash ); - dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - case KECCAK: - keccak512_4way_init( &ctx.keccak ); - if ( i == 0 ) - keccak512_4way_update( &ctx.keccak, input, size ); - else - { - intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); - keccak512_4way_update( &ctx.keccak, vhash, size ); - } - keccak512_4way_close( &ctx.keccak, vhash ); - dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - case SKEIN: - if ( i == 0 ) - skein512_4way_update( &ctx.skein, input + (64<<2), 16 ); - else - { - intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); - skein512_4way_init( &ctx.skein ); - skein512_4way_update( &ctx.skein, vhash, size ); - } - skein512_4way_close( &ctx.skein, vhash ); - dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - case LUFFA: - if ( i == 0 ) - { - update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash0, - (const BitSequence*)in0 + 64, 16 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash1, - (const BitSequence*)in1 + 64, 16 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash2, - (const BitSequence*)in2 + 64, 16 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash3, - (const BitSequence*)in3 + 64, 16 ); - } - else - { - intrlv_2x128( vhash, in0, in1, size<<3 ); - luffa512_2way_full( &ctx.luffa, vhash, vhash, size ); - dintrlv_2x128_512( hash0, hash1, vhash ); - intrlv_2x128( vhash, in2, in3, size<<3 ); - luffa512_2way_full( &ctx.luffa, vhash, vhash, size ); - dintrlv_2x128_512( hash2, hash3, vhash ); - } - break; - case CUBEHASH: - if ( i == 0 ) - { - cubehashUpdateDigest( &ctx.cube, (byte*)hash0, - (const byte*)in0 + 64, 16 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash1, - (const byte*)in1 + 64, 16 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash2, - (const byte*)in2 + 64, 16 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash3, - (const byte*)in3 + 64, 16 ); - - } - else - { - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash0, - (const byte*)in0, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash1, - (const byte*)in1, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash2, - (const byte*)in2, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash3, - (const byte*)in3, size ); - } - break; - case SHAVITE: - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in0, size ); - sph_shavite512_close( &ctx.shavite, hash0 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in1, size ); - sph_shavite512_close( &ctx.shavite, hash1 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in2, size ); - sph_shavite512_close( &ctx.shavite, hash2 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in3, size ); - sph_shavite512_close( &ctx.shavite, hash3 ); - break; - case SIMD: - intrlv_2x128( vhash, in0, in1, size<<3 ); - simd_2way_init( &ctx.simd, 512 ); - simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 ); - dintrlv_2x128( hash0, hash1, vhash, 512 ); - intrlv_2x128( vhash, in2, in3, size<<3 ); - simd_2way_init( &ctx.simd, 512 ); - simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 ); - dintrlv_2x128( hash2, hash3, vhash, 512 ); - break; - case ECHO: - echo_full( &ctx.echo, (BitSequence *)hash0, 512, - (const BitSequence *)in0, size ); - echo_full( &ctx.echo, (BitSequence *)hash1, 512, - (const BitSequence *)in1, size ); - echo_full( &ctx.echo, (BitSequence *)hash2, 512, - (const BitSequence *)in2, size ); - echo_full( &ctx.echo, (BitSequence *)hash3, 512, - (const BitSequence *)in3, size ); - break; - case HAMSI: - if ( i == 0 ) - hamsi512_4way_update( &ctx.hamsi, input + (64<<2), 16 ); - else - { - intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); - hamsi512_4way_init( &ctx.hamsi ); - hamsi512_4way_update( &ctx.hamsi, vhash, size ); - } - hamsi512_4way_close( &ctx.hamsi, vhash ); - dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - case FUGUE: - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in0, size ); - sph_fugue512_close( &ctx.fugue, hash0 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in1, size ); - sph_fugue512_close( &ctx.fugue, hash1 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in2, size ); - sph_fugue512_close( &ctx.fugue, hash2 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in3, size ); - sph_fugue512_close( &ctx.fugue, hash3 ); - break; - case SHABAL: - intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 ); - if ( i == 0 ) - shabal512_4way_update( &ctx.shabal, vhash + (16<<2), 16 ); - else - { - shabal512_4way_init( &ctx.shabal ); - shabal512_4way_update( &ctx.shabal, vhash, size ); - } - shabal512_4way_close( &ctx.shabal, vhash ); - dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - case WHIRLPOOL: - if ( i == 0 ) - { - sph_whirlpool( &ctx.whirlpool, in0 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash0 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in1 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash1 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in2 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash2 ); - memcpy( &ctx, &x16rt_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in3 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash3 ); - } - else - { - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in0, size ); - sph_whirlpool_close( &ctx.whirlpool, hash0 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in1, size ); - sph_whirlpool_close( &ctx.whirlpool, hash1 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in2, size ); - sph_whirlpool_close( &ctx.whirlpool, hash2 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in3, size ); - sph_whirlpool_close( &ctx.whirlpool, hash3 ); - } - break; - case SHA_512: - sha512_4way_init( &ctx.sha512 ); - if ( i == 0 ) - sha512_4way_update( &ctx.sha512, input, size ); - else - { - intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); - sha512_4way_update( &ctx.sha512, vhash, size ); - } - sha512_4way_close( &ctx.sha512, vhash ); - dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - } - size = 64; - } - memcpy( output, hash0, 32 ); - memcpy( output+32, hash1, 32 ); - memcpy( output+64, hash2, 32 ); - memcpy( output+96, hash3, 32 ); -} +#elif defined (X16R_4WAY) int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr) { uint32_t hash[4*16] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64))); - uint32_t vdata32[20*4] __attribute__ ((aligned (64))); - uint32_t edata[20] __attribute__ ((aligned (64))); uint32_t _ALIGN(64) timeHash[4*8]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -894,70 +78,24 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce, if ( bench ) ptarget[7] = 0x0cff; + static __thread uint32_t s_ntime = UINT32_MAX; uint32_t ntime = bswap_32( pdata[17] ); if ( s_ntime != ntime ) { x16rt_getTimeHash( ntime, &timeHash ); - x16rt_getAlgoString( &timeHash[0], hashOrder ); + x16rt_getAlgoString( &timeHash[0], x16r_hash_order ); s_ntime = ntime; if ( opt_debug && !thr_id ) applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)", - hashOrder, ntime, timeHash ); - } - - const char elem = hashOrder[0]; - const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; - switch ( algo ) - { - case JH: - mm256_bswap32_intrlv80_4x64( vdata, pdata ); - jh512_4way_init( &x16rt_ctx.jh ); - jh512_4way_update( &x16rt_ctx.jh, vdata, 64 ); - break; - case SKEIN: - mm256_bswap32_intrlv80_4x64( vdata, pdata ); - skein512_4way_init( &x16rt_ctx.skein ); - skein512_4way_update( &x16rt_ctx.skein, vdata, 64 ); - break; - case LUFFA: - mm128_bswap32_80( edata, pdata ); - init_luffa( &x16rt_ctx.luffa1, 512 ); - update_luffa( &x16rt_ctx.luffa1, (const BitSequence*)edata, 64 ); - intrlv_4x64( vdata, edata, edata, edata, edata, 640 ); - break; - case CUBEHASH: - mm128_bswap32_80( edata, pdata ); - cubehashInit( &x16rt_ctx.cube, 512, 16, 32 ); - cubehashUpdate( &x16rt_ctx.cube, (const byte*)edata, 64 ); - intrlv_4x64( vdata, edata, edata, edata, edata, 640 ); - break; - case HAMSI: - mm256_bswap32_intrlv80_4x64( vdata, pdata ); - hamsi512_4way_init( &x16rt_ctx.hamsi ); - hamsi512_4way_update( &x16rt_ctx.hamsi, vdata, 64 ); - break; - case SHABAL: - mm128_bswap32_intrlv80_4x32( vdata32, pdata ); - shabal512_4way_init( &x16rt_ctx.shabal ); - shabal512_4way_update( &x16rt_ctx.shabal, vdata32, 64 ); - rintrlv_4x32_4x64( vdata, vdata32, 640 ); - break; - case WHIRLPOOL: - mm128_bswap32_80( edata, pdata ); - sph_whirlpool_init( &x16rt_ctx.whirlpool ); - sph_whirlpool( &x16rt_ctx.whirlpool, edata, 64 ); - intrlv_4x64( vdata, edata, edata, edata, edata, 640 ); - break; - default: - mm256_bswap32_intrlv80_4x64( vdata, pdata ); + x16r_hash_order, ntime, timeHash ); } + x16r_4way_prehash( vdata, pdata ); *noncev = mm256_intrlv_blend_32( _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); - do { - x16rt_4way_hash( hash, vdata ); + x16r_4way_hash( hash, vdata ); for ( int i = 0; i < 4; i++ ) if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) ) { diff --git a/algo/x16/x16rt.c b/algo/x16/x16rt.c index 1c19cd6..185a6b5 100644 --- a/algo/x16/x16rt.c +++ b/algo/x16/x16rt.c @@ -1,234 +1,46 @@ #include "x16r-gate.h" -#include -#include -#include -#include "algo/blake/sph_blake.h" -#include "algo/bmw/sph_bmw.h" -#include "algo/groestl/sph_groestl.h" -#include "algo/jh/sph_jh.h" -#include "algo/keccak/sph_keccak.h" -#include "algo/skein/sph_skein.h" -#include "algo/shavite/sph_shavite.h" -#include "algo/luffa/luffa_for_sse2.h" -#include "algo/cubehash/cubehash_sse2.h" -#include "algo/simd/nist.h" -#include "algo/echo/sph_echo.h" -#include "algo/hamsi/sph_hamsi.h" -#include "algo/fugue/sph_fugue.h" -#include "algo/shabal/sph_shabal.h" -#include "algo/whirlpool/sph_whirlpool.h" -#include -#if defined(__AES__) - #include "algo/echo/aes_ni/hash_api.h" - #include "algo/groestl/aes_ni/hash-groestl.h" -#endif - -static __thread uint32_t s_ntime = UINT32_MAX; -static __thread bool s_implemented = false; -static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 }; - -union _x16rt_context_overlay -{ -#if defined(__AES__) - hashState_echo echo; - hashState_groestl groestl; -#else - sph_groestl512_context groestl; - sph_echo512_context echo; -#endif - sph_blake512_context blake; - sph_bmw512_context bmw; - sph_skein512_context skein; - sph_jh512_context jh; - sph_keccak512_context keccak; - hashState_luffa luffa; - cubehashParam cube; - sph_shavite512_context shavite; - hashState_sd simd; - sph_hamsi512_context hamsi; - sph_fugue512_context fugue; - sph_shabal512_context shabal; - sph_whirlpool_context whirlpool; - SHA512_CTX sha512; -}; -typedef union _x16rt_context_overlay x16rt_context_overlay; - -void x16rt_hash( void* output, const void* input ) -{ - uint32_t _ALIGN(128) hash[16]; - x16rt_context_overlay ctx; - int size = 80; - void *in = (void*) input; - -/* - void *in = (void*) input; - uint32_t *in32 = (uint32_t*) in; - uint32_t ntime = in32[17]; - if ( s_ntime == UINT32_MAX ) - { - uint32_t _ALIGN(64) timeHash[8]; - x16rt_getTimeHash(ntime, &timeHash); - x16rt_getAlgoString(&timeHash[0], hashOrder); - } -*/ - - for ( int i = 0; i < 16; i++ ) - { - const char elem = hashOrder[i]; - const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; - - switch ( algo ) - { - case BLAKE: - sph_blake512_init( &ctx.blake ); - sph_blake512( &ctx.blake, in, size ); - sph_blake512_close( &ctx.blake, hash ); - break; - case BMW: - sph_bmw512_init( &ctx.bmw ); - sph_bmw512(&ctx.bmw, in, size); - sph_bmw512_close(&ctx.bmw, hash); - break; - case GROESTL: -#if defined(__AES__) - init_groestl( &ctx.groestl, 64 ); - update_and_final_groestl( &ctx.groestl, (char*)hash, - (const char*)in, size<<3 ); -#else - sph_groestl512_init( &ctx.groestl ); - sph_groestl512( &ctx.groestl, in, size ); - sph_groestl512_close(&ctx.groestl, hash); -#endif - break; - case SKEIN: - sph_skein512_init( &ctx.skein ); - sph_skein512( &ctx.skein, in, size ); - sph_skein512_close( &ctx.skein, hash ); - break; - case JH: - sph_jh512_init( &ctx.jh ); - sph_jh512(&ctx.jh, in, size ); - sph_jh512_close(&ctx.jh, hash ); - break; - case KECCAK: - sph_keccak512_init( &ctx.keccak ); - sph_keccak512( &ctx.keccak, in, size ); - sph_keccak512_close( &ctx.keccak, hash ); - break; - case LUFFA: - init_luffa( &ctx.luffa, 512 ); - update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, - (const BitSequence*)in, size ); - break; - case CUBEHASH: - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash, - (const byte*)in, size ); - break; - case SHAVITE: - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in, size ); - sph_shavite512_close( &ctx.shavite, hash ); - break; - case SIMD: - init_sd( &ctx.simd, 512 ); - update_final_sd( &ctx.simd, (BitSequence *)hash, - (const BitSequence*)in, size<<3 ); - break; - case ECHO: -#if defined(__AES__) - init_echo( &ctx.echo, 512 ); - update_final_echo ( &ctx.echo, (BitSequence *)hash, - (const BitSequence*)in, size<<3 ); -#else - sph_echo512_init( &ctx.echo ); - sph_echo512( &ctx.echo, in, size ); - sph_echo512_close( &ctx.echo, hash ); -#endif - break; - case HAMSI: - sph_hamsi512_init( &ctx.hamsi ); - sph_hamsi512( &ctx.hamsi, in, size ); - sph_hamsi512_close( &ctx.hamsi, hash ); - break; - case FUGUE: - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in, size ); - sph_fugue512_close( &ctx.fugue, hash ); - break; - case SHABAL: - sph_shabal512_init( &ctx.shabal ); - sph_shabal512( &ctx.shabal, in, size ); - sph_shabal512_close( &ctx.shabal, hash ); - break; - case WHIRLPOOL: - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in, size ); - sph_whirlpool_close( &ctx.whirlpool, hash ); - break; - case SHA_512: - SHA512_Init( &ctx.sha512 ); - SHA512_Update( &ctx.sha512, in, size ); - SHA512_Final( (unsigned char*) hash, &ctx.sha512 ); - break; - } - in = (void*) hash; - size = 64; - } - memcpy(output, hash, 32); -} +#if !defined(X16R_8WAY) && !defined(X16R_4WAY) int scanhash_x16rt( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(128) hash32[8]; - uint32_t _ALIGN(128) endiandata[20]; + uint32_t _ALIGN(128) edata[20]; uint32_t _ALIGN(64) timeHash[8]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; - const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; - int thr_id = mythr->id; // thr_id arg is deprecated + const int thr_id = mythr->id; uint32_t nonce = first_nonce; volatile uint8_t *restart = &(work_restart[thr_id].restart); + const bool bench = opt_benchmark; + if ( bench ) ptarget[7] = 0x0cff; - casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) ); - casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) ); - casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) ); - casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) ); - casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) ); + mm128_bswap32_80( edata, pdata ); + static __thread uint32_t s_ntime = UINT32_MAX; uint32_t ntime = swab32( pdata[17] ); if ( s_ntime != ntime ) { x16rt_getTimeHash( ntime, &timeHash ); - x16rt_getAlgoString( &timeHash[0], hashOrder ); + x16rt_getAlgoString( &timeHash[0], x16r_hash_order ); s_ntime = ntime; - s_implemented = true; if ( opt_debug && !thr_id ) applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)", - hashOrder, ntime, timeHash ); - } - if ( !s_implemented ) - { - applog( LOG_WARNING, "s not implemented"); - sleep(1); - return 0; + x16r_hash_order, ntime, timeHash ); } - if ( opt_benchmark ) - ptarget[7] = 0x0cff; - + x16r_prehash( edata, pdata ); + do { - be32enc( &endiandata[19], nonce ); - x16rt_hash( hash32, endiandata ); + edata[19] = nonce; + x16r_hash( hash32, edata ); - if ( hash32[7] <= Htarg ) - if (fulltest( hash32, ptarget ) && !opt_benchmark ) + if ( valid_hash( hash32, ptarget ) && !bench ) { - pdata[19] = nonce; + pdata[19] = bswap_32( nonce ); submit_solution( work, hash32, mythr ); } nonce++; @@ -237,3 +49,6 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } + +#endif // !defined(X16R_8WAY) && !defined(X16R_4WAY) + diff --git a/algo/x16/x16rv2.c b/algo/x16/x16rv2.c index 56989b4..a3c4e4c 100644 --- a/algo/x16/x16rv2.c +++ b/algo/x16/x16rv2.c @@ -6,6 +6,8 @@ */ #include "x16r-gate.h" +#if !defined(X16R_8WAY) && !defined(X16R_4WAY) + #include #include #include @@ -245,3 +247,5 @@ int scanhash_x16rv2( struct work *work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } + +#endif diff --git a/algo/x16/x21s-4way.c b/algo/x16/x21s-4way.c index e84163c..9ce97d9 100644 --- a/algo/x16/x21s-4way.c +++ b/algo/x16/x21s-4way.c @@ -8,480 +8,43 @@ #include #include #include -#include "algo/blake/blake-hash-4way.h" -#include "algo/bmw/bmw-hash-4way.h" -#include "algo/groestl/aes_ni/hash-groestl.h" -#include "algo/groestl/aes_ni/hash-groestl.h" -#include "algo/skein/skein-hash-4way.h" -#include "algo/jh/jh-hash-4way.h" -#include "algo/keccak/keccak-hash-4way.h" -#include "algo/shavite/sph_shavite.h" -#include "algo/luffa/luffa-hash-2way.h" -#include "algo/luffa/luffa_for_sse2.h" -#include "algo/cubehash/cubehash_sse2.h" -#include "algo/cubehash/cube-hash-2way.h" -#include "algo/simd/simd-hash-2way.h" -#include "algo/echo/aes_ni/hash_api.h" -#include "algo/hamsi/hamsi-hash-4way.h" -#include "algo/fugue/sph_fugue.h" -#include "algo/shabal/shabal-hash-4way.h" -#include "algo/whirlpool/sph_whirlpool.h" -#include "algo/sha/sha-hash-4way.h" #include "algo/haval/haval-hash-4way.h" #include "algo/tiger/sph_tiger.h" #include "algo/gost/sph_gost.h" #include "algo/lyra2/lyra2.h" -#if defined(__VAES__) - #include "algo/groestl/groestl512-hash-4way.h" - #include "algo/shavite/shavite-hash-4way.h" - #include "algo/echo/echo-hash-4way.h" -#endif #if defined(__SHA__) #include #endif -#if defined(X21S_8WAY) || defined(X21S_4WAY) - -static __thread uint32_t s_ntime = UINT32_MAX; -static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 }; - -#endif - #if defined (X21S_8WAY) static __thread uint64_t* x21s_8way_matrix; union _x21s_8way_context_overlay { - blake512_8way_context blake; - bmw512_8way_context bmw; - skein512_8way_context skein; - jh512_8way_context jh; - keccak512_8way_context keccak; - luffa_4way_context luffa; - cubehashParam cube; -// cube_4way_context cube; - simd_4way_context simd; - hamsi512_8way_context hamsi; - sph_fugue512_context fugue; - shabal512_8way_context shabal; - sph_whirlpool_context whirlpool; - sha512_8way_context sha512; haval256_5_8way_context haval; sph_tiger_context tiger; sph_gost512_context gost; sha256_8way_context sha256; -#if defined(__VAES__) - groestl512_4way_context groestl; - shavite512_4way_context shavite; - echo_4way_context echo; -#else - hashState_groestl groestl; - sph_shavite512_context shavite; - hashState_echo echo; -#endif } __attribute__ ((aligned (64))); typedef union _x21s_8way_context_overlay x21s_8way_context_overlay; -static __thread x21s_8way_context_overlay x21s_ctx; - void x21s_8way_hash( void* output, const void* input ) { - uint32_t vhash[20*8] __attribute__ ((aligned (128))); - uint32_t hash0[20] __attribute__ ((aligned (64))); - uint32_t hash1[20] __attribute__ ((aligned (64))); - uint32_t hash2[20] __attribute__ ((aligned (64))); - uint32_t hash3[20] __attribute__ ((aligned (64))); - uint32_t hash4[20] __attribute__ ((aligned (64))); - uint32_t hash5[20] __attribute__ ((aligned (64))); - uint32_t hash6[20] __attribute__ ((aligned (64))); - uint32_t hash7[20] __attribute__ ((aligned (64))); + uint32_t vhash[16*8] __attribute__ ((aligned (128))); + uint8_t shash[64*8] __attribute__ ((aligned (64))); + uint32_t *hash0 = (uint32_t*) shash; + uint32_t *hash1 = (uint32_t*)( shash+64 ); + uint32_t *hash2 = (uint32_t*)( shash+128 ); + uint32_t *hash3 = (uint32_t*)( shash+192 ); + uint32_t *hash4 = (uint32_t*)( shash+256 ); + uint32_t *hash5 = (uint32_t*)( shash+320 ); + uint32_t *hash6 = (uint32_t*)( shash+384 ); + uint32_t *hash7 = (uint32_t*)( shash+448 ); x21s_8way_context_overlay ctx; - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - void *in0 = (void*) hash0; - void *in1 = (void*) hash1; - void *in2 = (void*) hash2; - void *in3 = (void*) hash3; - void *in4 = (void*) hash4; - void *in5 = (void*) hash5; - void *in6 = (void*) hash6; - void *in7 = (void*) hash7; - int size = 80; - dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, - input, 640 ); - - for ( int i = 0; i < 16; i++ ) - { - const char elem = hashOrder[i]; - const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; - - switch ( algo ) - { - case BLAKE: - if ( i == 0 ) - blake512_8way_full( &ctx.blake, vhash, input, size ); - else - { - intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - blake512_8way_full( &ctx.blake, vhash, vhash, size ); - } - dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, - hash6, hash7, vhash ); - break; - case BMW: - bmw512_8way_init( &ctx.bmw ); - if ( i == 0 ) - bmw512_8way_update( &ctx.bmw, input, size ); - else - { - intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - bmw512_8way_update( &ctx.bmw, vhash, size ); - } - bmw512_8way_close( &ctx.bmw, vhash ); - dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, - hash7, vhash ); - break; - case GROESTL: -#if defined(__VAES__) - intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 ); - groestl512_4way_full( &ctx.groestl, vhash, vhash, size ); - dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash ); - intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 ); - groestl512_4way_full( &ctx.groestl, vhash, vhash, size ); - dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash ); -#else - groestl512_full( &ctx.groestl, (char*)hash0, (char*)in0, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash1, (char*)in1, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash2, (char*)in2, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash3, (char*)in3, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash4, (char*)in4, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash5, (char*)in5, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash6, (char*)in6, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash7, (char*)in7, size<<3 ); -#endif - break; - case JH: - if ( i == 0 ) - jh512_8way_update( &ctx.jh, input + (64<<3), 16 ); - else - { - intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - jh512_8way_init( &ctx.jh ); - jh512_8way_update( &ctx.jh, vhash, size ); - } - jh512_8way_close( &ctx.jh, vhash ); - dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, - hash7, vhash ); - break; - case KECCAK: - keccak512_8way_init( &ctx.keccak ); - if ( i == 0 ) - keccak512_8way_update( &ctx.keccak, input, size ); - else - { - intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - keccak512_8way_update( &ctx.keccak, vhash, size ); - } - keccak512_8way_close( &ctx.keccak, vhash ); - dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, - hash7, vhash ); - break; - case SKEIN: - if ( i == 0 ) - skein512_8way_update( &ctx.skein, input + (64<<3), 16 ); - else - { - intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - skein512_8way_init( &ctx.skein ); - skein512_8way_update( &ctx.skein, vhash, size ); - } - skein512_8way_close( &ctx.skein, vhash ); - dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, - hash7, vhash ); - break; - case LUFFA: - if ( i == 0 ) - { - intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 ); - luffa_4way_update_close( &ctx.luffa, vhash, - vhash + (16<<2), 16 ); - dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 ); - luffa_4way_update_close( &ctx.luffa, vhash, - vhash + (16<<2), 16 ); - dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash ); - } - else - { - intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 ); - luffa512_4way_full( &ctx.luffa, vhash, vhash, size ); - dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash ); - intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 ); - luffa512_4way_full( &ctx.luffa, vhash, vhash, size ); - dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash ); - } - break; - case CUBEHASH: - if ( i == 0 ) - { - cubehashUpdateDigest( &ctx.cube, (byte*)hash0, - (const byte*)in0 + 64, 16 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash1, - (const byte*)in1 + 64, 16 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash2, - (const byte*)in2 + 64, 16 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash3, - (const byte*)in3 + 64, 16 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash4, - (const byte*)in4 + 64, 16 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash5, - (const byte*)in5 + 64, 16 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash6, - (const byte*)in6 + 64, 16 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash7, - (const byte*)in7 + 64, 16 ); - } - else - { - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash0, - (const byte*)in0, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash1, - (const byte*)in1, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash2, - (const byte*)in2, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash3, - (const byte*)in3, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash4, - (const byte*)in4, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash5, - (const byte*)in5, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash6, - (const byte*)in6, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash7, - (const byte*)in7, size ); - } - break; - case SHAVITE: -#if defined(__VAES__) - intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 ); - shavite512_4way_init( &ctx.shavite ); - shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size ); - dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash ); - intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 ); - shavite512_4way_init( &ctx.shavite ); - shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size ); - dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash ); -#else - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in0, size ); - sph_shavite512_close( &ctx.shavite, hash0 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in1, size ); - sph_shavite512_close( &ctx.shavite, hash1 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in2, size ); - sph_shavite512_close( &ctx.shavite, hash2 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in3, size ); - sph_shavite512_close( &ctx.shavite, hash3 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in4, size ); - sph_shavite512_close( &ctx.shavite, hash4 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in5, size ); - sph_shavite512_close( &ctx.shavite, hash5 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in6, size ); - sph_shavite512_close( &ctx.shavite, hash6 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in7, size ); - sph_shavite512_close( &ctx.shavite, hash7 ); -#endif - break; - case SIMD: - intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 ); - simd512_4way_full( &ctx.simd, vhash, vhash, size ); - dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash ); - intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 ); - simd512_4way_full( &ctx.simd, vhash, vhash, size ); - dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash ); - break; - case ECHO: -#if defined(__VAES__) - intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 ); - echo_4way_full( &ctx.echo, vhash, 512, vhash, size ); - dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash ); - intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 ); - echo_4way_full( &ctx.echo, vhash, 512, vhash, size ); - dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash ); -#else - echo_full( &ctx.echo, (BitSequence *)hash0, 512, - (const BitSequence *)in0, size ); - echo_full( &ctx.echo, (BitSequence *)hash1, 512, - (const BitSequence *)in1, size ); - echo_full( &ctx.echo, (BitSequence *)hash2, 512, - (const BitSequence *)in2, size ); - echo_full( &ctx.echo, (BitSequence *)hash3, 512, - (const BitSequence *)in3, size ); - echo_full( &ctx.echo, (BitSequence *)hash4, 512, - (const BitSequence *)in4, size ); - echo_full( &ctx.echo, (BitSequence *)hash5, 512, - (const BitSequence *)in5, size ); - echo_full( &ctx.echo, (BitSequence *)hash6, 512, - (const BitSequence *)in6, size ); - echo_full( &ctx.echo, (BitSequence *)hash7, 512, - (const BitSequence *)in7, size ); -#endif - break; - case HAMSI: - if ( i == 0 ) - hamsi512_8way_update( &ctx.hamsi, input + (64<<3), 16 ); - else - { - intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - hamsi512_8way_init( &ctx.hamsi ); - hamsi512_8way_update( &ctx.hamsi, vhash, size ); - } - hamsi512_8way_close( &ctx.hamsi, vhash ); - dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, - hash7, vhash ); - break; - case FUGUE: - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in0, size ); - sph_fugue512_close( &ctx.fugue, hash0 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in1, size ); - sph_fugue512_close( &ctx.fugue, hash1 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in2, size ); - sph_fugue512_close( &ctx.fugue, hash2 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in3, size ); - sph_fugue512_close( &ctx.fugue, hash3 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in4, size ); - sph_fugue512_close( &ctx.fugue, hash4 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in5, size ); - sph_fugue512_close( &ctx.fugue, hash5 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in6, size ); - sph_fugue512_close( &ctx.fugue, hash6 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in7, size ); - sph_fugue512_close( &ctx.fugue, hash7 ); - break; - case SHABAL: - intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - if ( i == 0 ) - shabal512_8way_update( &ctx.shabal, vhash + (16<<3), 16 ); - else - { - shabal512_8way_init( &ctx.shabal ); - shabal512_8way_update( &ctx.shabal, vhash, size ); - } - shabal512_8way_close( &ctx.shabal, vhash ); - dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, - hash7, vhash ); - break; - case WHIRLPOOL: - if ( i == 0 ) - { - sph_whirlpool( &ctx.whirlpool, in0 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash0 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in1 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash1 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in2 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash2 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in3 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash3 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in4 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash4 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in5 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash5 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in6 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash6 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in7 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash7 ); - } - else - { - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in0, size ); - sph_whirlpool_close( &ctx.whirlpool, hash0 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in1, size ); - sph_whirlpool_close( &ctx.whirlpool, hash1 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in2, size ); - sph_whirlpool_close( &ctx.whirlpool, hash2 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in3, size ); - sph_whirlpool_close( &ctx.whirlpool, hash3 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in4, size ); - sph_whirlpool_close( &ctx.whirlpool, hash4 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in5, size ); - sph_whirlpool_close( &ctx.whirlpool, hash5 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in6, size ); - sph_whirlpool_close( &ctx.whirlpool, hash6 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in7, size ); - sph_whirlpool_close( &ctx.whirlpool, hash7 ); - } - break; - case SHA_512: - sha512_8way_init( &ctx.sha512 ); - if ( i == 0 ) - sha512_8way_update( &ctx.sha512, input, size ); - else - { - intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7, - size<<3 ); - sha512_8way_update( &ctx.sha512, vhash, size ); - } - sha512_8way_close( &ctx.sha512, vhash ); - dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, - hash7, vhash ); - break; - } - size = 64; - } + x16r_8way_hash_generic( shash, input ); intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7 ); @@ -568,8 +131,6 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce, { uint32_t hash[16*8] __attribute__ ((aligned (128))); uint32_t vdata[20*8] __attribute__ ((aligned (64))); - uint32_t vdata2[20*8] __attribute__ ((aligned (64))); - uint32_t edata[20] __attribute__ ((aligned (64))); uint32_t *hash7 = &hash[7<<3]; uint32_t lane_hash[8] __attribute__ ((aligned (64))); uint32_t bedata1[2] __attribute__((aligned(64))); @@ -588,71 +149,21 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce, bedata1[0] = bswap_32( pdata[1] ); bedata1[1] = bswap_32( pdata[2] ); + + static __thread uint32_t s_ntime = UINT32_MAX; uint32_t ntime = bswap_32( pdata[17] ); if ( s_ntime != ntime ) { - x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder ); + x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); s_ntime = ntime; if ( opt_debug && !thr_id ) - applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime ); - } - - // Do midstate prehash on hash functions with block size <= 64 bytes. - const char elem = hashOrder[0]; - const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; - switch ( algo ) - { - case JH: - mm512_bswap32_intrlv80_8x64( vdata, pdata ); - jh512_8way_init( &x21s_ctx.jh ); - jh512_8way_update( &x21s_ctx.jh, vdata, 64 ); - break; - case SKEIN: - mm512_bswap32_intrlv80_8x64( vdata, pdata ); - skein512_8way_init( &x21s_ctx.skein ); - skein512_8way_update( &x21s_ctx.skein, vdata, 64 ); - break; - case LUFFA: - mm128_bswap32_80( edata, pdata ); - intrlv_4x128( vdata2, edata, edata, edata, edata, 640 ); - luffa_4way_init( &x21s_ctx.luffa, 512 ); - luffa_4way_update( &x21s_ctx.luffa, vdata2, 64 ); - rintrlv_4x128_8x64( vdata, vdata2, vdata2, 640 ); - break; - case CUBEHASH: - mm128_bswap32_80( edata, pdata ); - cubehashInit( &x21s_ctx.cube, 512, 16, 32 ); - cubehashUpdate( &x21s_ctx.cube, (const byte*)edata, 64 ); - intrlv_8x64( vdata, edata, edata, edata, edata, - edata, edata, edata, edata, 640 ); - break; - case HAMSI: - mm512_bswap32_intrlv80_8x64( vdata, pdata ); - hamsi512_8way_init( &x21s_ctx.hamsi ); - hamsi512_8way_update( &x21s_ctx.hamsi, vdata, 64 ); - break; - case SHABAL: - mm256_bswap32_intrlv80_8x32( vdata2, pdata ); - shabal512_8way_init( &x21s_ctx.shabal ); - shabal512_8way_update( &x21s_ctx.shabal, vdata2, 64 ); - rintrlv_8x32_8x64( vdata, vdata2, 640 ); - break; - case WHIRLPOOL: - mm128_bswap32_80( edata, pdata ); - sph_whirlpool_init( &x21s_ctx.whirlpool ); - sph_whirlpool( &x21s_ctx.whirlpool, edata, 64 ); - intrlv_8x64( vdata, edata, edata, edata, edata, - edata, edata, edata, edata, 640 ); - break; - default: - mm512_bswap32_intrlv80_8x64( vdata, pdata ); + applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); } + x16r_8way_prehash( vdata, pdata ); *noncev = mm512_intrlv_blend_32( _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0, n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); - - do { x21s_8way_hash( hash, vdata ); @@ -670,7 +181,7 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce, *noncev = _mm512_add_epi32( *noncev, m512_const1_64( 0x0000000800000000 ) ); n += 8; - } while ( ( n < last_nonce ) && !(*restart) ); + } while ( likely( ( n < last_nonce ) && !(*restart) ) ); pdata[19] = n; *hashes_done = n - first_nonce; return 0; @@ -692,23 +203,6 @@ static __thread uint64_t* x21s_4way_matrix; union _x21s_4way_context_overlay { - blake512_4way_context blake; - bmw512_4way_context bmw; - hashState_echo echo; - hashState_groestl groestl; - skein512_4way_context skein; - jh512_4way_context jh; - keccak512_4way_context keccak; - luffa_2way_context luffa; - hashState_luffa luffa1; - cubehashParam cube; - sph_shavite512_context shavite; - simd_2way_context simd; - hamsi512_4way_context hamsi; - sph_fugue512_context fugue; - shabal512_4way_context shabal; - sph_whirlpool_context whirlpool; - sha512_4way_context sha512; haval256_5_4way_context haval; sph_tiger_context tiger; sph_gost512_context gost; @@ -718,282 +212,21 @@ union _x21s_4way_context_overlay sha256_4way_context sha256; #endif } __attribute__ ((aligned (64))); -typedef union _x21s_4way_context_overlay x21s_4way_context_overlay; -static __thread x21s_4way_context_overlay x21s_ctx; +typedef union _x21s_4way_context_overlay x21s_4way_context_overlay; void x21s_4way_hash( void* output, const void* input ) { - uint32_t hash0[20] __attribute__ ((aligned (64))); - uint32_t hash1[20] __attribute__ ((aligned (64))); - uint32_t hash2[20] __attribute__ ((aligned (64))); - uint32_t hash3[20] __attribute__ ((aligned (64))); - uint32_t vhash[20*4] __attribute__ ((aligned (64))); + uint32_t vhash[16*4] __attribute__ ((aligned (64))); + uint8_t shash[64*4] __attribute__ ((aligned (64))); x21s_4way_context_overlay ctx; - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - void *in0 = (void*) hash0; - void *in1 = (void*) hash1; - void *in2 = (void*) hash2; - void *in3 = (void*) hash3; - int size = 80; - - dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 ); - - // Input data is both 64 bit interleaved (input) - // and deinterleaved in inp0-3. - // If First function uses 64 bit data it is not required to interleave inp - // first. It may use the inerleaved data dmost convenient, ie 4way 64 bit. - // All other functions assume data is deinterleaved in hash0-3 - // All functions must exit with data deinterleaved in hash0-3. - // Alias in0-3 points to either inp0-3 or hash0-3 according to - // its hashOrder position. Size is also set accordingly. - for ( int i = 0; i < 16; i++ ) - { - const char elem = hashOrder[i]; - const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; - - switch ( algo ) - { - case BLAKE: - if ( i == 0 ) - blake512_4way_full( &ctx.blake, vhash, input, size ); - else - { - intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); - blake512_4way_full( &ctx.blake, vhash, vhash, size ); - } - dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - case BMW: - bmw512_4way_init( &ctx.bmw ); - if ( i == 0 ) - bmw512_4way_update( &ctx.bmw, input, size ); - else - { - intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); - bmw512_4way_update( &ctx.bmw, vhash, size ); - } - bmw512_4way_close( &ctx.bmw, vhash ); - dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - case GROESTL: - groestl512_full( &ctx.groestl, (char*)hash0, (char*)in0, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash1, (char*)in1, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash2, (char*)in2, size<<3 ); - groestl512_full( &ctx.groestl, (char*)hash3, (char*)in3, size<<3 ); - break; - case JH: - if ( i == 0 ) - jh512_4way_update( &ctx.jh, input + (64<<2), 16 ); - else - { - intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); - jh512_4way_init( &ctx.jh ); - jh512_4way_update( &ctx.jh, vhash, size ); - } - jh512_4way_close( &ctx.jh, vhash ); - dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - case KECCAK: - keccak512_4way_init( &ctx.keccak ); - if ( i == 0 ) - keccak512_4way_update( &ctx.keccak, input, size ); - else - { - intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); - keccak512_4way_update( &ctx.keccak, vhash, size ); - } - keccak512_4way_close( &ctx.keccak, vhash ); - dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - case SKEIN: - if ( i == 0 ) - skein512_4way_update( &ctx.skein, input + (64<<2), 16 ); - else - { - intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); - skein512_4way_init( &ctx.skein ); - skein512_4way_update( &ctx.skein, vhash, size ); - } - skein512_4way_close( &ctx.skein, vhash ); - dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - case LUFFA: - if ( i == 0 ) - { - update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash0, - (const BitSequence*)in0 + 64, 16 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash1, - (const BitSequence*)in1 + 64, 16 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash2, - (const BitSequence*)in2 + 64, 16 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash3, - (const BitSequence*)in3 + 64, 16 ); - } - else - { - intrlv_2x128( vhash, in0, in1, size<<3 ); - luffa512_2way_full( &ctx.luffa, vhash, vhash, size ); - dintrlv_2x128_512( hash0, hash1, vhash ); - intrlv_2x128( vhash, in2, in3, size<<3 ); - luffa512_2way_full( &ctx.luffa, vhash, vhash, size ); - dintrlv_2x128_512( hash2, hash3, vhash ); - } - break; - case CUBEHASH: - if ( i == 0 ) - { - cubehashUpdateDigest( &ctx.cube, (byte*)hash0, - (const byte*)in0 + 64, 16 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash1, - (const byte*)in1 + 64, 16 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash2, - (const byte*)in2 + 64, 16 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash3, - (const byte*)in3 + 64, 16 ); - - } - else - { - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash0, - (const byte*)in0, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash1, - (const byte*)in1, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash2, - (const byte*)in2, size ); - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*)hash3, - (const byte*)in3, size ); - } - break; - case SHAVITE: - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in0, size ); - sph_shavite512_close( &ctx.shavite, hash0 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in1, size ); - sph_shavite512_close( &ctx.shavite, hash1 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in2, size ); - sph_shavite512_close( &ctx.shavite, hash2 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in3, size ); - sph_shavite512_close( &ctx.shavite, hash3 ); - break; - case SIMD: - intrlv_2x128( vhash, in0, in1, size<<3 ); - simd_2way_init( &ctx.simd, 512 ); - simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 ); - dintrlv_2x128( hash0, hash1, vhash, 512 ); - intrlv_2x128( vhash, in2, in3, size<<3 ); - simd_2way_init( &ctx.simd, 512 ); - simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 ); - dintrlv_2x128( hash2, hash3, vhash, 512 ); - break; - case ECHO: - echo_full( &ctx.echo, (BitSequence *)hash0, 512, - (const BitSequence *)in0, size ); - echo_full( &ctx.echo, (BitSequence *)hash1, 512, - (const BitSequence *)in1, size ); - echo_full( &ctx.echo, (BitSequence *)hash2, 512, - (const BitSequence *)in2, size ); - echo_full( &ctx.echo, (BitSequence *)hash3, 512, - (const BitSequence *)in3, size ); - break; - case HAMSI: - if ( i == 0 ) - hamsi512_4way_update( &ctx.hamsi, input + (64<<2), 16 ); - else - { - intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); - hamsi512_4way_init( &ctx.hamsi ); - hamsi512_4way_update( &ctx.hamsi, vhash, size ); - } - hamsi512_4way_close( &ctx.hamsi, vhash ); - dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - case FUGUE: - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in0, size ); - sph_fugue512_close( &ctx.fugue, hash0 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in1, size ); - sph_fugue512_close( &ctx.fugue, hash1 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in2, size ); - sph_fugue512_close( &ctx.fugue, hash2 ); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in3, size ); - sph_fugue512_close( &ctx.fugue, hash3 ); - break; - case SHABAL: - intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 ); - if ( i == 0 ) - shabal512_4way_update( &ctx.shabal, vhash + (16<<2), 16 ); - else - { - shabal512_4way_init( &ctx.shabal ); - shabal512_4way_update( &ctx.shabal, vhash, size ); - } - shabal512_4way_close( &ctx.shabal, vhash ); - dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - case WHIRLPOOL: - if ( i == 0 ) - { - sph_whirlpool( &ctx.whirlpool, in0 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash0 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in1 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash1 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in2 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash2 ); - memcpy( &ctx, &x21s_ctx, sizeof(ctx) ); - sph_whirlpool( &ctx.whirlpool, in3 + 64, 16 ); - sph_whirlpool_close( &ctx.whirlpool, hash3 ); - } - else - { - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in0, size ); - sph_whirlpool_close( &ctx.whirlpool, hash0 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in1, size ); - sph_whirlpool_close( &ctx.whirlpool, hash1 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in2, size ); - sph_whirlpool_close( &ctx.whirlpool, hash2 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in3, size ); - sph_whirlpool_close( &ctx.whirlpool, hash3 ); - } - break; - case SHA_512: - sha512_4way_init( &ctx.sha512 ); - if ( i == 0 ) - sha512_4way_update( &ctx.sha512, input, size ); - else - { - intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); - sha512_4way_update( &ctx.sha512, vhash, size ); - } - sha512_4way_close( &ctx.sha512, vhash ); - dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); - break; - } - size = 64; - } + uint32_t *hash0 = (uint32_t*) shash; + uint32_t *hash1 = (uint32_t*)( shash+64 ); + uint32_t *hash2 = (uint32_t*)( shash+128 ); + uint32_t *hash3 = (uint32_t*)( shash+192 ); + x16r_4way_hash_generic( shash, input ); + intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); haval256_5_4way_init( &ctx.haval ); @@ -1073,8 +306,6 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce, { uint32_t hash[16*4] __attribute__ ((aligned (64))); uint32_t vdata[20*4] __attribute__ ((aligned (64))); - uint32_t vdata32[20*4] __attribute__ ((aligned (64))); - uint32_t edata[20] __attribute__ ((aligned (64))); uint32_t bedata1[2] __attribute__((aligned(64))); uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -1090,66 +321,20 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce, bedata1[0] = bswap_32( pdata[1] ); bedata1[1] = bswap_32( pdata[2] ); + + static __thread uint32_t s_ntime = UINT32_MAX; uint32_t ntime = bswap_32( pdata[17] ); if ( s_ntime != ntime ) { - x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder ); + x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); s_ntime = ntime; if ( opt_debug && !thr_id ) - applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime ); - } - - const char elem = hashOrder[0]; - const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; - - switch ( algo ) - { - case JH: - mm256_bswap32_intrlv80_4x64( vdata, pdata ); - jh512_4way_init( &x21s_ctx.jh ); - jh512_4way_update( &x21s_ctx.jh, vdata, 64 ); - break; - case SKEIN: - mm256_bswap32_intrlv80_4x64( vdata, pdata ); - skein512_4way_init( &x21s_ctx.skein ); - skein512_4way_update( &x21s_ctx.skein, vdata, 64 ); - break; - case LUFFA: - mm128_bswap32_80( edata, pdata ); - init_luffa( &x21s_ctx.luffa1, 512 ); - update_luffa( &x21s_ctx.luffa1, (const BitSequence*)edata, 64 ); - intrlv_4x64( vdata, edata, edata, edata, edata, 640 ); - break; - case CUBEHASH: - mm128_bswap32_80( edata, pdata ); - cubehashInit( &x21s_ctx.cube, 512, 16, 32 ); - cubehashUpdate( &x21s_ctx.cube, (const byte*)edata, 64 ); - intrlv_4x64( vdata, edata, edata, edata, edata, 640 ); - break; - case HAMSI: - mm256_bswap32_intrlv80_4x64( vdata, pdata ); - hamsi512_4way_init( &x21s_ctx.hamsi ); - hamsi512_4way_update( &x21s_ctx.hamsi, vdata, 64 ); - break; - case SHABAL: - mm128_bswap32_intrlv80_4x32( vdata32, pdata ); - shabal512_4way_init( &x21s_ctx.shabal ); - shabal512_4way_update( &x21s_ctx.shabal, vdata32, 64 ); - rintrlv_4x32_4x64( vdata, vdata32, 640 ); - break; - case WHIRLPOOL: - mm128_bswap32_80( edata, pdata ); - sph_whirlpool_init( &x21s_ctx.whirlpool ); - sph_whirlpool( &x21s_ctx.whirlpool, edata, 64 ); - intrlv_4x64( vdata, edata, edata, edata, edata, 640 ); - break; - default: - mm256_bswap32_intrlv80_4x64( vdata, pdata ); + applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime ); } + x16r_4way_prehash( vdata, pdata ); *noncev = mm256_intrlv_blend_32( _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); - do { x21s_4way_hash( hash, vdata ); @@ -1162,7 +347,7 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce, *noncev = _mm256_add_epi32( *noncev, m256_const1_64( 0x0000000400000000 ) ); n += 4; - } while ( ( n < last_nonce ) && !(*restart) ); + } while ( likely( ( n < last_nonce ) && !(*restart) ) ); pdata[19] = n; *hashes_done = n - first_nonce; return 0; diff --git a/algo/x16/x21s.c b/algo/x16/x21s.c index 8f290af..4d75323 100644 --- a/algo/x16/x21s.c +++ b/algo/x16/x21s.c @@ -5,63 +5,21 @@ * Optimized by JayDDee@github Jan 2018 */ #include "x16r-gate.h" - #include #include #include -#include "algo/blake/sph_blake.h" -#include "algo/bmw/sph_bmw.h" -#include "algo/groestl/sph_groestl.h" -#include "algo/jh/sph_jh.h" -#include "algo/keccak/sph_keccak.h" -#include "algo/skein/sph_skein.h" -#include "algo/shavite/sph_shavite.h" -#include "algo/luffa/luffa_for_sse2.h" -#include "algo/cubehash/cubehash_sse2.h" -#include "algo/simd/nist.h" -#include "algo/echo/sph_echo.h" -#include "algo/hamsi/sph_hamsi.h" -#include "algo/fugue/sph_fugue.h" -#include "algo/shabal/sph_shabal.h" -#include "algo/whirlpool/sph_whirlpool.h" #include -#if defined(__AES__) - #include "algo/echo/aes_ni/hash_api.h" - #include "algo/groestl/aes_ni/hash-groestl.h" -#endif #include "algo/haval/sph-haval.h" #include "algo/tiger/sph_tiger.h" #include "algo/gost/sph_gost.h" #include "algo/lyra2/lyra2.h" -static __thread uint32_t s_ntime = UINT32_MAX; -static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 }; +#if !defined(X16R_8WAY) && !defined(X16R_4WAY) static __thread uint64_t* x21s_matrix; union _x21s_context_overlay { -#if defined(__AES__) - hashState_echo echo; - hashState_groestl groestl; -#else - sph_groestl512_context groestl; - sph_echo512_context echo; -#endif - sph_blake512_context blake; - sph_bmw512_context bmw; - sph_skein512_context skein; - sph_jh512_context jh; - sph_keccak512_context keccak; - hashState_luffa luffa; - cubehashParam cube; - sph_shavite512_context shavite; - hashState_sd simd; - sph_hamsi512_context hamsi; - sph_fugue512_context fugue; - sph_shabal512_context shabal; - sph_whirlpool_context whirlpool; - SHA512_CTX sha512; sph_haval256_5_context haval; sph_tiger_context tiger; sph_gost512_context gost; @@ -73,112 +31,8 @@ void x21s_hash( void* output, const void* input ) { uint32_t _ALIGN(128) hash[16]; x21s_context_overlay ctx; - void *in = (void*) input; - int size = 80; - for ( int i = 0; i < 16; i++ ) - { - const char elem = hashOrder[i]; - const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; - - switch ( algo ) - { - case BLAKE: - sph_blake512_init( &ctx.blake ); - sph_blake512( &ctx.blake, in, size ); - sph_blake512_close( &ctx.blake, hash ); - break; - case BMW: - sph_bmw512_init( &ctx.bmw ); - sph_bmw512(&ctx.bmw, in, size); - sph_bmw512_close(&ctx.bmw, hash); - break; - case GROESTL: -#if defined(__AES__) - init_groestl( &ctx.groestl, 64 ); - update_and_final_groestl( &ctx.groestl, (char*)hash, - (const char*)in, size<<3 ); -#else - sph_groestl512_init( &ctx.groestl ); - sph_groestl512( &ctx.groestl, in, size ); - sph_groestl512_close(&ctx.groestl, hash); -#endif - break; - case SKEIN: - sph_skein512_init( &ctx.skein ); - sph_skein512( &ctx.skein, in, size ); - sph_skein512_close( &ctx.skein, hash ); - break; - case JH: - sph_jh512_init( &ctx.jh ); - sph_jh512(&ctx.jh, in, size ); - sph_jh512_close(&ctx.jh, hash ); - break; - case KECCAK: - sph_keccak512_init( &ctx.keccak ); - sph_keccak512( &ctx.keccak, in, size ); - sph_keccak512_close( &ctx.keccak, hash ); - break; - case LUFFA: - init_luffa( &ctx.luffa, 512 ); - update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, - (const BitSequence*)in, size ); - break; - case CUBEHASH: - cubehashInit( &ctx.cube, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cube, (byte*) hash, - (const byte*)in, size ); - break; - case SHAVITE: - sph_shavite512_init( &ctx.shavite ); - sph_shavite512( &ctx.shavite, in, size ); - sph_shavite512_close( &ctx.shavite, hash ); - break; - case SIMD: - init_sd( &ctx.simd, 512 ); - update_final_sd( &ctx.simd, (BitSequence *)hash, - (const BitSequence*)in, size<<3 ); - break; - case ECHO: -#if defined(__AES__) - init_echo( &ctx.echo, 512 ); - update_final_echo ( &ctx.echo, (BitSequence *)hash, - (const BitSequence*)in, size<<3 ); -#else - sph_echo512_init( &ctx.echo ); - sph_echo512( &ctx.echo, in, size ); - sph_echo512_close( &ctx.echo, hash ); -#endif - break; - case HAMSI: - sph_hamsi512_init( &ctx.hamsi ); - sph_hamsi512( &ctx.hamsi, in, size ); - sph_hamsi512_close( &ctx.hamsi, hash ); - break; - case FUGUE: - sph_fugue512_init( &ctx.fugue ); - sph_fugue512( &ctx.fugue, in, size ); - sph_fugue512_close( &ctx.fugue, hash ); - break; - case SHABAL: - sph_shabal512_init( &ctx.shabal ); - sph_shabal512( &ctx.shabal, in, size ); - sph_shabal512_close( &ctx.shabal, hash ); - break; - case WHIRLPOOL: - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool( &ctx.whirlpool, in, size ); - sph_whirlpool_close( &ctx.whirlpool, hash ); - break; - case SHA_512: - SHA512_Init( &ctx.sha512 ); - SHA512_Update( &ctx.sha512, in, size ); - SHA512_Final( (unsigned char*) hash, &ctx.sha512 ); - break; - } - in = (void*) hash; - size = 64; - } + x16r_hash_generic( hash, input ); sph_haval256_5_init( &ctx.haval ); sph_haval256_5( &ctx.haval, (const void*) hash, 64) ; @@ -206,42 +60,38 @@ int scanhash_x21s( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t _ALIGN(128) hash32[8]; - uint32_t _ALIGN(128) endiandata[20]; + uint32_t _ALIGN(128) edata[20]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; - const uint32_t Htarg = ptarget[7]; const uint32_t first_nonce = pdata[19]; - int thr_id = mythr->id; // thr_id arg is deprecated + const int thr_id = mythr->id; uint32_t nonce = first_nonce; volatile uint8_t *restart = &(work_restart[thr_id].restart); + const bool bench = opt_benchmark; + if ( bench ) ptarget[7] = 0x0cff; - casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) ); - casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) ); - casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) ); - casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) ); - casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) ); + mm128_bswap32_80( edata, pdata ); + static __thread uint32_t s_ntime = UINT32_MAX; if ( s_ntime != pdata[17] ) { uint32_t ntime = swab32(pdata[17]); - x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder ); + x16_r_s_getAlgoString( (const uint8_t*)(&edata[1]), x16r_hash_order ); s_ntime = ntime; if ( opt_debug && !thr_id ) - applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime ); + applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); } - if ( opt_benchmark ) - ptarget[7] = 0x0cff; + x16r_prehash( edata, pdata ); do { - be32enc( &endiandata[19], nonce ); - x21s_hash( hash32, endiandata ); + edata[19] = nonce; + x21s_hash( hash32, edata ); - if ( hash32[7] <= Htarg ) - if (fulltest( hash32, ptarget ) && !opt_benchmark ) + if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) ) { - pdata[19] = nonce; + pdata[19] = bswap_32( nonce ); submit_solution( work, hash32, mythr ); } nonce++; @@ -261,3 +111,4 @@ bool x21s_thread_init() return x21s_matrix; } +#endif diff --git a/algo/x17/sonoa.c b/algo/x17/sonoa.c index 1e61fc2..8e4a293 100644 --- a/algo/x17/sonoa.c +++ b/algo/x17/sonoa.c @@ -1,4 +1,7 @@ #include "sonoa-gate.h" + +#if !defined(SONOA_8WAY) && !defined(SONOA_4WAY) + #include #include #include @@ -616,3 +619,5 @@ int scanhash_sonoa( struct work *work, uint32_t max_nonce, pdata[19] = n; return 0; } + +#endif diff --git a/algo/x17/x17.c b/algo/x17/x17.c index 95c30a3..dbcb5d2 100644 --- a/algo/x17/x17.c +++ b/algo/x17/x17.c @@ -1,4 +1,7 @@ #include "x17-gate.h" + +#if !defined(X17_8WAY) && !defined(X17_4WAY) + #include #include #include @@ -9,9 +12,6 @@ #include "algo/keccak/sph_keccak.h" #include "algo/skein/sph_skein.h" #include "algo/shavite/sph_shavite.h" -#include "algo/luffa/sph_luffa.h" -#include "algo/cubehash/sph_cubehash.h" -#include "algo/simd/sph_simd.h" #include "algo/hamsi/sph_hamsi.h" #include "algo/fugue/sph_fugue.h" #include "algo/shabal/sph_shabal.h" @@ -148,30 +148,32 @@ void x17_hash(void *output, const void *input) int scanhash_x17( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr) { - uint32_t endiandata[20] __attribute__((aligned(64))); + uint32_t edata[20] __attribute__((aligned(64))); uint32_t hash64[8] __attribute__((aligned(64))); uint32_t *pdata = work->data; uint32_t *ptarget = work->target; uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; - int thr_id = mythr->id; // thr_id arg is deprecated - - // we need bigendian data... - casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) ); - casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) ); - casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) ); - casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) ); - casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) ); + const int thr_id = mythr->id; + const bool bench = opt_benchmark; + mm128_bswap32_80( edata, pdata ); + do { - pdata[19] = ++n; - be32enc( &endiandata[19], n ); - x17_hash( hash64, endiandata ); - if unlikely( valid_hash( hash64, ptarget ) && !opt_benchmark ) - submit_solution( work, hash64, mythr ); + edata[19] = n; + x17_hash( hash64, edata ); + if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) ) + { + pdata[19] = bswap_32( n ); + submit_solution( work, hash64, mythr ); + } + n++; } while ( n < max_nonce && !work_restart[thr_id].restart); *hashes_done = n - first_nonce + 1; pdata[19] = n; return 0; } + +#endif + diff --git a/algo/x17/xevan.c b/algo/x17/xevan.c index b351eb3..54553f8 100644 --- a/algo/x17/xevan.c +++ b/algo/x17/xevan.c @@ -1,5 +1,7 @@ #include "xevan-gate.h" +#if !defined(XEVAN_8WAY) && !defined(XEVAN_4WAY) + #include #include #include @@ -268,3 +270,4 @@ int scanhash_xevan( struct work *work, uint32_t max_nonce, return 0; } +#endif diff --git a/algo/x22/x22i.c b/algo/x22/x22i.c index 65643e3..2e036e6 100644 --- a/algo/x22/x22i.c +++ b/algo/x22/x22i.c @@ -1,3 +1,7 @@ +#include "x22i-gate.h" + +#if !( defined(X22I_8WAY) || defined(X22I_4WAY) ) + #include "algo/blake/sph_blake.h" #include "algo/bmw/sph_bmw.h" #if defined(__AES__) @@ -24,7 +28,6 @@ #include "algo/lyra2/lyra2.h" #include "algo/gost/sph_gost.h" #include "algo/swifftx/swifftx.h" -#include "x22i-gate.h" union _x22i_context_overlay { @@ -200,3 +203,4 @@ int scanhash_x22i( struct work* work, uint32_t max_nonce, return 0; } +#endif diff --git a/algo/x22/x25x.c b/algo/x22/x25x.c index 8dd5c62..6d8b04b 100644 --- a/algo/x22/x25x.c +++ b/algo/x22/x25x.c @@ -1,4 +1,7 @@ #include "x22i-gate.h" + +#if !( defined(X25X_8WAY) || defined(X25X_4WAY) ) + #include "algo/blake/sph_blake.h" #include "algo/bmw/sph_bmw.h" #if defined(__AES__) @@ -201,7 +204,7 @@ void x25x_hash( void *output, const void *input ) int scanhash_x25x( struct work* work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { - uint32_t endiandata[20] __attribute__((aligned(64))); + uint32_t edata[20] __attribute__((aligned(64))); uint32_t hash[8] __attribute__((aligned(64))); uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -213,17 +216,19 @@ int scanhash_x25x( struct work* work, uint32_t max_nonce, if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x08ff; + mm128_bswap32_80( edata, pdata ); + for (int k=0; k < 20; k++) - be32enc(&endiandata[k], pdata[k]); + be32enc(&edata[k], pdata[k]); InitializeSWIFFTX(); do { pdata[19] = ++n; - be32enc( &endiandata[19], n ); + be32enc( &edata[19], n ); - x25x_hash( hash, endiandata ); + x25x_hash( hash, edata ); if ( hash[7] < Htarg ) if ( fulltest( hash, ptarget ) && !opt_benchmark ) @@ -234,3 +239,4 @@ int scanhash_x25x( struct work* work, uint32_t max_nonce, return 0; } +#endif diff --git a/configure b/configure index 3f7e8f2..0385591 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.8. +# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.9. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -577,8 +577,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt' -PACKAGE_VERSION='3.11.8' -PACKAGE_STRING='cpuminer-opt 3.11.8' +PACKAGE_VERSION='3.11.9' +PACKAGE_STRING='cpuminer-opt 3.11.9' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures cpuminer-opt 3.11.8 to adapt to many kinds of systems. +\`configure' configures cpuminer-opt 3.11.9 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1404,7 +1404,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of cpuminer-opt 3.11.8:";; + short | recursive ) echo "Configuration of cpuminer-opt 3.11.9:";; esac cat <<\_ACEOF @@ -1509,7 +1509,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -cpuminer-opt configure 3.11.8 +cpuminer-opt configure 3.11.9 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by cpuminer-opt $as_me 3.11.8, which was +It was created by cpuminer-opt $as_me 3.11.9, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2993,7 +2993,7 @@ fi # Define the identity of the package. PACKAGE='cpuminer-opt' - VERSION='3.11.8' + VERSION='3.11.9' cat >>confdefs.h <<_ACEOF @@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by cpuminer-opt $as_me 3.11.8, which was +This file was extended by cpuminer-opt $as_me 3.11.9, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -6756,7 +6756,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -cpuminer-opt config.status 3.11.8 +cpuminer-opt config.status 3.11.9 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 5d8771d..fa3f281 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([cpuminer-opt], [3.11.8]) +AC_INIT([cpuminer-opt], [3.11.9]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/cpu-miner.c b/cpu-miner.c index 7e1f094..46717ca 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -107,7 +107,6 @@ enum algos opt_algo = ALGO_NULL; char* opt_param_key = NULL; int opt_param_n = 0; int opt_param_r = 0; -int opt_pluck_n = 128; int opt_n_threads = 0; bool opt_reset_on_stale = false; bool opt_sapling = false; @@ -175,7 +174,8 @@ uint64_t net_blocks = 0; uint32_t opt_work_size = 0; char *opt_api_allow = NULL; int opt_api_remote = 0; - int opt_api_listen = 4048; + int opt_api_listen = 0; +// int opt_api_listen = 4048; pthread_mutex_t rpc2_job_lock; pthread_mutex_t rpc2_login_lock; @@ -1003,7 +1003,7 @@ void report_summary_log( bool force ) if ( solved_block_count ) applog2( LOG_INFO,"Blocks solved %6d", solved_block_count ); - +/* #if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32)) int temp = cpu_temp(0); @@ -1023,6 +1023,7 @@ void report_summary_log( bool force ) applog2(LOG_INFO,"CPU temp %s max %dC", tempstr, hi_temp ); #endif +*/ } static int share_result( int result, struct work *null_work, @@ -1784,7 +1785,7 @@ static bool get_work(struct thr_info *thr, struct work *work) return true; } -static bool submit_work( struct thr_info *thr, +bool submit_work( struct thr_info *thr, const struct work *work_in ) { struct workio_cmd *wc; @@ -2059,6 +2060,7 @@ static void *miner_thread( void *userdata ) struct thr_info *mythr = (struct thr_info *) userdata; int thr_id = mythr->id; uint32_t max_nonce; + struct timeval cpu_temp_time = {0}; // end_nonce gets read before being set so it needs to be initialized // what is an appropriate value that is completely neutral? @@ -2294,6 +2296,34 @@ static void *miner_thread( void *userdata ) pthread_mutex_unlock( &g_work_lock ); } } + +#if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32)) + if (!opt_quiet && mythr->id == 0 ) + { + int temp = cpu_temp(0); + timeval_subtract( &diff, &tv_end, &cpu_temp_time ); + int wait = temp >= 80 ? 30 : temp >= 70 ? 90 : 180; + if ( ( diff.tv_sec > wait ) || ( temp > hi_temp ) ) + { + char tempstr[32]; + int lo_freq, hi_freq; + linux_cpu_hilo_freq( &lo_freq, &hi_freq ); + memcpy( &cpu_temp_time, &tv_end, sizeof(cpu_temp_time) ); + if ( temp > hi_temp ) hi_temp = temp; + if ( use_colors && ( temp >= 70 ) ) + { + if ( temp >= 80 ) + sprintf( tempstr, "%s%d C%s", CL_WHT CL_RED, temp, CL_N ); + else + sprintf( tempstr, "%s%d C%s", CL_WHT CL_YLW, temp, CL_N ); + } + else + sprintf( tempstr, "%d C", temp ); + applog( LOG_INFO,"CPU temp: curr %s (max %d), Freq: %.3f/%.3f GHz", + tempstr, hi_temp, (float)lo_freq / 1e6, (float)hi_freq/ 1e6 ); + } + } +#endif // display hashrate if ( unlikely( opt_hash_meter ) ) { @@ -2719,14 +2749,17 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work ) uint64_t net_ttf = ( last_block_height - session_first_block ) == 0 ? 0 : et.tv_sec / ( last_block_height - session_first_block ); - double net_hr = net_diff * diff_to_hash / net_ttf; - char net_ttf_str[32]; - char net_hr_units[4] = {0}; + if ( net_diff && net_ttf ) + { + double net_hr = net_diff * diff_to_hash / net_ttf; + char net_ttf_str[32]; + char net_hr_units[4] = {0}; - sprintf_et( net_ttf_str, net_ttf ); - scale_hash_for_display ( &net_hr, net_hr_units ); - applog2( LOG_INFO, "Net TTF @ %.2f %sh/s: %s", - net_hr, net_hr_units, net_ttf_str ); + sprintf_et( net_ttf_str, net_ttf ); + scale_hash_for_display ( &net_hr, net_hr_units ); + applog2( LOG_INFO, "Net TTF @ %.2f %sh/s: %s", + net_hr, net_hr_units, net_ttf_str ); + } } } // hr > 0 } // !quiet @@ -2739,8 +2772,23 @@ void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work ) work_free( g_work ); work_copy( g_work, &sctx->work ); pthread_mutex_unlock( &sctx->work_lock ); +/* + if ( stratum_diff != sctx->job.diff ) + applog( LOG_BLUE, "New stratum diff %g, block %d, job %s", + sctx->job.diff, sctx->block_height, g_work->job_id ); + else if ( last_block_height != sctx->block_height ) + applog( LOG_BLUE, "New block %d, job %s", + sctx->block_height, g_work->job_id ); + else if ( g_work->job_id ) + applog( LOG_BLUE,"New job %s", g_work->job_id ); +*/ if ( last_block_height != stratum.block_height ) - last_block_height = stratum.block_height; + { + applog(LOG_BLUE, "Stratum detected new block"); + last_block_height = stratum.block_height; + } + if ( stratum_diff != g_work->stratum_diff ) + stratum_diff = g_work->stratum_diff; } static void *stratum_thread(void *userdata ) @@ -2751,7 +2799,7 @@ static void *stratum_thread(void *userdata ) stratum.url = (char*) tq_pop(mythr->q, NULL); if (!stratum.url) goto out; - applog(LOG_INFO, "Starting Stratum on %s", stratum.url); + applog( LOG_INFO, "Stratum connect %s", short_url ); while (1) { @@ -2794,6 +2842,8 @@ static void *stratum_thread(void *userdata ) applog(LOG_ERR, "...retry after %d seconds", opt_fail_pause); sleep(opt_fail_pause); } + else + applog(LOG_BLUE,"Stratum connection established" ); if ( unlikely( jsonrpc_2 ) ) { work_free(&g_work); @@ -2979,7 +3029,7 @@ void parse_arg(int key, char *arg ) /* port or 0 to disable */ opt_api_listen = atoi(arg); } - break; + break; case 1030: /* --api-remote */ opt_api_remote = 1; break; @@ -3585,7 +3635,7 @@ int main(int argc, char *argv[]) rpc_user = strdup(""); rpc_pass = strdup(""); - opt_api_allow = strdup("127.0.0.1"); /* 0.0.0.0 for all ips */ +// opt_api_allow = strdup("127.0.0.1"); /* 0.0.0.0 for all ips */ parse_cmdline(argc, argv); @@ -3870,16 +3920,21 @@ int main(int argc, char *argv[]) thr->q = tq_new(); if (!thr->q) return 1; - err = thread_create(thr, api_thread); - if (err) { - applog(LOG_ERR, "api thread create failed"); + err = thread_create( thr, api_thread ); + if ( err ) + { + applog( LOG_ERR, "api thread create failed" ); return 1; } - } + if ( !opt_quiet ) + applog( LOG_INFO,"API listnening to %s:%d", opt_api_allow, + opt_api_listen ); + } /* start mining threads */ for (i = 0; i < opt_n_threads; i++) { + usleep( 5000 ); thr = &thr_info[i]; thr->id = i; thr->q = tq_new(); @@ -3890,7 +3945,7 @@ int main(int argc, char *argv[]) applog(LOG_ERR, "thread %d create failed", i); return 1; } - } + } applog(LOG_INFO, "%d miner threads started, " "using '%s' algorithm.", diff --git a/miner.h b/miner.h index 9629ee3..c766e8f 100644 --- a/miner.h +++ b/miner.h @@ -338,7 +338,7 @@ bool submit_lane_solution( struct work *work, const void *hash, struct thr_info *thr, const int lane ); -//bool submit_work( struct thr_info *thr, const struct work *work_in ); +bool submit_work( struct thr_info *thr, const struct work *work_in ); void get_currentalgo( char* buf, int sz ); @@ -368,6 +368,7 @@ struct work { double targetdiff; // double shareratio; double sharediff; + double stratum_diff; int height; char *txs; @@ -526,7 +527,6 @@ enum algos { ALGO_ARGON2D500, ALGO_ARGON2D4096, ALGO_AXIOM, - ALGO_BASTION, ALGO_BLAKE, ALGO_BLAKE2B, ALGO_BLAKE2S, @@ -540,10 +540,7 @@ enum algos { ALGO_DECRED, ALGO_DEEP, ALGO_DMD_GR, - ALGO_DROP, - ALGO_FRESH, ALGO_GROESTL, - ALGO_HEAVY, ALGO_HEX, ALGO_HMQ1725, ALGO_HODL, @@ -551,7 +548,6 @@ enum algos { ALGO_KECCAK, ALGO_KECCAKC, ALGO_LBRY, - ALGO_LUFFA, ALGO_LYRA2H, ALGO_LYRA2RE, ALGO_LYRA2REV2, @@ -565,7 +561,6 @@ enum algos { ALGO_PENTABLAKE, ALGO_PHI1612, ALGO_PHI2, - ALGO_PLUCK, ALGO_POLYTIMOS, ALGO_POWER2B, ALGO_QUARK, @@ -626,7 +621,6 @@ static const char* const algo_names[] = { "argon2d500", "argon2d4096", "axiom", - "bastion", "blake", "blake2b", "blake2s", @@ -640,10 +634,7 @@ static const char* const algo_names[] = { "decred", "deep", "dmd-gr", - "drop", - "fresh", "groestl", - "heavy", "hex", "hmq1725", "hodl", @@ -651,7 +642,6 @@ static const char* const algo_names[] = { "keccak", "keccakc", "lbry", - "luffa", "lyra2h", "lyra2re", "lyra2rev2", @@ -665,7 +655,6 @@ static const char* const algo_names[] = { "pentablake", "phi1612", "phi2", - "pluck", "polytimos", "power2b", "quark", @@ -758,7 +747,6 @@ extern double stratum_diff; extern bool opt_reset_on_stale; extern double net_diff; extern double net_hashrate; -extern int opt_pluck_n; extern int opt_param_n; extern int opt_param_r; extern char* opt_param_key; @@ -792,7 +780,6 @@ Options:\n\ argon2d500 argon2d-dyn, Dynamic (DYN)\n\ argon2d4096 argon2d-uis, Unitus (UIS)\n\ axiom Shabal-256 MemoHash\n\ - bastion\n\ blake blake256r14 (SFR)\n\ blake2b Blake2b 256\n\ blake2s Blake-2 S\n\ @@ -806,10 +793,7 @@ Options:\n\ decred Blake256r14dcr\n\ deep Deepcoin (DCN)\n\ dmd-gr Diamond\n\ - drop Dropcoin\n\ - fresh Fresh\n\ groestl Groestl coin\n\ - heavy Heavy\n\ hex x16r-hex\n\ hmq1725 Espers\n\ hodl Hodlcoin\n\ @@ -817,7 +801,6 @@ Options:\n\ keccak Maxcoin\n\ keccakc Creative Coin\n\ lbry LBC, LBRY Credits\n\ - luffa Luffa\n\ lyra2h Hppcoin\n\ lyra2re lyra2\n\ lyra2rev2 lyrav2\n\ @@ -831,7 +814,6 @@ Options:\n\ pentablake 5 x blake512\n\ phi1612 phi\n\ phi2\n\ - pluck Pluck:128 (Supcoin)\n\ polytimos\n\ power2b MicroBitcoin (MBC)\n\ quark Quark\n\ diff --git a/simd-utils/intrlv.h b/simd-utils/intrlv.h index 0ca4f95..b923fd3 100644 --- a/simd-utils/intrlv.h +++ b/simd-utils/intrlv.h @@ -579,6 +579,32 @@ static inline void mm128_bswap32_80( void *d, void *s ) casti_m128i( d, 4 ) = _mm_shuffle_epi8( casti_m128i( s, 4 ), bswap_shuf ); } +#else + +static inline void mm128_bswap32_80( void *d, void *s ) +{ + ( (uint32_t*)d )[ 0] = bswap_32( ( (uint32_t*)s )[ 0] ); + ( (uint32_t*)d )[ 1] = bswap_32( ( (uint32_t*)s )[ 1] ); + ( (uint32_t*)d )[ 2] = bswap_32( ( (uint32_t*)s )[ 2] ); + ( (uint32_t*)d )[ 3] = bswap_32( ( (uint32_t*)s )[ 3] ); + ( (uint32_t*)d )[ 4] = bswap_32( ( (uint32_t*)s )[ 4] ); + ( (uint32_t*)d )[ 5] = bswap_32( ( (uint32_t*)s )[ 5] ); + ( (uint32_t*)d )[ 6] = bswap_32( ( (uint32_t*)s )[ 6] ); + ( (uint32_t*)d )[ 7] = bswap_32( ( (uint32_t*)s )[ 7] ); + ( (uint32_t*)d )[ 8] = bswap_32( ( (uint32_t*)s )[ 8] ); + ( (uint32_t*)d )[ 9] = bswap_32( ( (uint32_t*)s )[ 9] ); + ( (uint32_t*)d )[10] = bswap_32( ( (uint32_t*)s )[10] ); + ( (uint32_t*)d )[11] = bswap_32( ( (uint32_t*)s )[11] ); + ( (uint32_t*)d )[12] = bswap_32( ( (uint32_t*)s )[12] ); + ( (uint32_t*)d )[13] = bswap_32( ( (uint32_t*)s )[13] ); + ( (uint32_t*)d )[14] = bswap_32( ( (uint32_t*)s )[14] ); + ( (uint32_t*)d )[15] = bswap_32( ( (uint32_t*)s )[15] ); + ( (uint32_t*)d )[16] = bswap_32( ( (uint32_t*)s )[16] ); + ( (uint32_t*)d )[17] = bswap_32( ( (uint32_t*)s )[17] ); + ( (uint32_t*)d )[18] = bswap_32( ( (uint32_t*)s )[18] ); + ( (uint32_t*)d )[19] = bswap_32( ( (uint32_t*)s )[19] ); +} + #endif static inline void mm128_bswap32_intrlv80_4x32( void *d, const void *src ) diff --git a/simd-utils/simd-512.h b/simd-utils/simd-512.h index a74cb83..c7d1a9c 100644 --- a/simd-utils/simd-512.h +++ b/simd-utils/simd-512.h @@ -137,7 +137,7 @@ static inline __m512i m512_const_64( const uint64_t i7, const uint64_t i6, #define m512_const1_8( i ) _mm512_broadcastb_epi8 ( mm128_mov32_128( i ) ) #define m512_const2_128( v1, v0 ) \ - m512_const1_256( _mm512_inserti64x2( _mm512_castsi128_si512( lo ), hi, 1 ) ) + m512_const1_256( _mm512_inserti64x2( _mm512_castsi128_si512( v0 ), v1, 1 ) ) #define m512_const2_64( i1, i0 ) \ m512_const1_128( m128_const_64( i1, i0 ) ) diff --git a/sysinfos.c b/sysinfos.c index 182879d..0f74a75 100644 --- a/sysinfos.c +++ b/sysinfos.c @@ -98,7 +98,13 @@ static inline float linux_cputemp(int core) } #define CPUFREQ_PATH \ - "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq" + "/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq" + +#define CPUFREQ_PATHn \ + "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq" + + +// "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq" static inline uint32_t linux_cpufreq(int core) { FILE *fd = fopen(CPUFREQ_PATH, "r"); @@ -113,35 +119,60 @@ static inline uint32_t linux_cpufreq(int core) return freq; } +static inline void linux_cpu_hilo_freq( uint32_t* lo, uint32_t *hi ) +{ + uint64_t freq = 0, hi_freq = 0, lo_freq = 0xffffffffffffffff; + + for ( int i = 0; i < num_cpus; i++ ) + { + char path[64]; + sprintf( path, CPUFREQ_PATHn, i ); + + FILE *fd = fopen( path, "r" ); + if ( fd ) + { + if ( fscanf( fd, "%ld", &freq ) ) + { + if ( freq > hi_freq ) hi_freq = freq; + if ( freq < lo_freq ) lo_freq = freq; + } + } + } + *hi = hi_freq; + *lo = lo_freq; +} + + #else /* WIN32 */ -static inline float win32_cputemp(int core) +static inline float win32_cputemp( int core ) { // todo return 0.0; } + #endif /* !WIN32 */ /* exports */ -static inline float cpu_temp(int core) +static inline float cpu_temp( int core ) { #ifdef WIN32 - return win32_cputemp(core); + return win32_cputemp( core ); #else - return linux_cputemp(core); + return linux_cputemp( core ); #endif } -static inline uint32_t cpu_clock(int core) +static inline uint32_t cpu_clock( int core ) { #ifdef WIN32 return 0; #else - return linux_cpufreq(core); + return linux_cpufreq( core ); #endif } diff --git a/util.c b/util.c index 635be39..aea0778 100644 --- a/util.c +++ b/util.c @@ -1777,8 +1777,9 @@ bool rpc2_job_decode(const json_t *job, struct work *work) double diff = trunc( ( ((double)0xffffffff) / target ) ); if ( !opt_quiet ) // xmr pool diff can change a lot... - applog(LOG_WARNING, "Stratum difficulty set to %g", diff); - stratum_diff = diff; + applog(LOG_BLUE, "Stratum difficulty set to %g", diff); + work->stratum_diff = diff; + stratum_diff = diff; rpc2_target = target; }