From 88f81fda0bbe71c4c1adae3b6b942340595385f0 Mon Sep 17 00:00:00 2001 From: Jay D Dee Date: Sun, 26 Jan 2020 04:33:39 -0500 Subject: [PATCH] v3.11.7 --- Makefile.am | 7 +- README.md | 3 +- RELEASE_NOTES | 11 + algo-gate-api.c | 2 + algo-gate-api.h | 48 +- algo/keccak/keccak-4way.c | 35 +- algo/keccak/keccak-gate.c | 53 +++ algo/keccak/keccak-gate.h | 15 +- algo/keccak/keccak-hash-4way.c | 5 +- algo/keccak/keccak.c | 52 ++- algo/keccak/sha3d-4way.c | 126 ++++++ algo/keccak/sha3d.c | 50 +++ algo/keccak/sph_keccak.c | 4 +- algo/lyra2/allium-4way.c | 38 +- algo/lyra2/lyra2-gate.c | 2 +- algo/nist5/zr5.c | 5 +- .../sha256_p.c => sha/hmac-sha256-hash.c} | 118 ++--- .../sha256_p.h => sha/hmac-sha256-hash.h} | 17 +- algo/sha/sha256-hash-4way.c | 33 ++ algo/sha/sha512-hash-4way.c | 64 ++- algo/x13/drop.c | 4 +- algo/x17/x17-4way.c | 17 +- algo/yescrypt/sha256_Y.c | 409 ------------------ algo/yescrypt/sysendian.h | 124 ------ algo/yescrypt/yescrypt-simd.c | 18 +- algo/yescrypt/yescrypt.c | 53 +-- algo/yespower/crypto/blake2b-yp.c | 9 +- algo/yespower/insecure_memzero.h | 1 - algo/yespower/sysendian.h | 94 ---- algo/yespower/utils/insecure_memzero.h | 1 - algo/yespower/utils/sysendian.h | 94 ---- algo/yespower/yescrypt-r8g.c | 80 ++++ .../sha256_Y.h => yespower/yescrypt-r8g.h} | 56 +-- algo/yespower/yespower-blake2b.c | 12 +- algo/yespower/yespower-gate.c | 108 +++-- algo/yespower/yespower-opt.c | 15 +- algo/yespower/yespower-ref.c | 12 +- algo/yespower/yespower.h | 2 +- configure | 20 +- configure.ac | 2 +- cpu-miner.c | 88 ++-- miner.h | 21 +- util.c | 116 +++-- 43 files changed, 861 insertions(+), 1183 deletions(-) create mode 100644 algo/keccak/sha3d-4way.c create mode 100644 algo/keccak/sha3d.c rename algo/{yespower/sha256_p.c => sha/hmac-sha256-hash.c} (58%) rename algo/{yespower/sha256_p.h => sha/hmac-sha256-hash.h} (88%) delete mode 100644 algo/yescrypt/sha256_Y.c delete mode 100644 algo/yescrypt/sysendian.h delete mode 100644 algo/yespower/insecure_memzero.h delete mode 100644 algo/yespower/sysendian.h delete mode 100644 algo/yespower/utils/insecure_memzero.h delete mode 100644 algo/yespower/utils/sysendian.h create mode 100644 algo/yespower/yescrypt-r8g.c rename algo/{yescrypt/sha256_Y.h => yespower/yescrypt-r8g.h} (51%) diff --git a/Makefile.am b/Makefile.am index c9a861e..99b2fee 100644 --- a/Makefile.am +++ b/Makefile.am @@ -80,7 +80,6 @@ cpuminer_SOURCES = \ algo/cryptonight/cryptonight-common.c\ algo/cryptonight/cryptonight-aesni.c\ algo/cryptonight/cryptonight.c\ - algo/cubehash/sph_cubehash.c \ algo/cubehash/cubehash_sse2.c\ algo/cubehash/cube-hash-2way.c \ algo/echo/sph_echo.c \ @@ -121,6 +120,8 @@ cpuminer_SOURCES = \ algo/keccak/keccak-hash-4way.c \ algo/keccak/keccak-4way.c\ algo/keccak/keccak-gate.c \ + algo/keccak/sha3d-4way.c \ + algo/keccak/sha3d.c \ algo/lanehash/lane.c \ algo/luffa/sph_luffa.c \ algo/luffa/luffa.c \ @@ -180,6 +181,7 @@ cpuminer_SOURCES = \ algo/sha/sph_sha2big.c \ algo/sha/sha256-hash-4way.c \ algo/sha/sha512-hash-4way.c \ + algo/sha/hmac-sha256-hash.c \ algo/sha/sha2.c \ algo/sha/sha256t-gate.c \ algo/sha/sha256t-4way.c \ @@ -292,12 +294,11 @@ cpuminer_SOURCES = \ algo/x22/x25x.c \ algo/x22/x25x-4way.c \ algo/yescrypt/yescrypt.c \ - algo/yescrypt/sha256_Y.c \ algo/yescrypt/yescrypt-best.c \ algo/yespower/yespower-gate.c \ algo/yespower/yespower-blake2b.c \ algo/yespower/crypto/blake2b-yp.c \ - algo/yespower/sha256_p.c \ + algo/yespower/yescrypt-r8g.c \ algo/yespower/yespower-opt.c disable_flags = diff --git a/README.md b/README.md index 4019bd1..0707fb7 100644 --- a/README.md +++ b/README.md @@ -97,10 +97,10 @@ Supported Algorithms qubit Qubit scrypt scrypt(1024, 1, 1) (default) scrypt:N scrypt(N, 1, 1) - scryptjane:nf sha256d Double SHA-256 sha256q Quad SHA-256, Pyrite (PYE) sha256t Triple SHA-256, Onecoin (OC) + sha3d Double keccak256 (BSHA3) shavite3 Shavite3 skein Skein+Sha (Skeincoin) skein2 Double Skein (Woodcoin) @@ -134,6 +134,7 @@ Supported Algorithms xevan Bitsend (BSD) yescrypt Globalboost-Y (BSTY) yescryptr8 BitZeny (ZNY) + yescryptr8g Koto (KOTO) yescryptr16 Eli yescryptr32 WAVI yespower Cryply diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 30080ef..aa6938b 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -65,6 +65,17 @@ If not what makes it happen or not happen? Change Log ---------- +v3.11.7 + +Added yescryptr8g algo fotr KOTO, including support for block version 5. + +Added sha3d algo for BSHA3. + +Removed memcmp and clean_job checks from get_new_work, now only check job_id. + +Small improvement to sha512 and sha256 parallel implementations that don't +use SHA. + v3.11.6 Fixed CPU temperature regression from v3.11.5. diff --git a/algo-gate-api.c b/algo-gate-api.c index a65c00a..5f31b93 100644 --- a/algo-gate-api.c +++ b/algo-gate-api.c @@ -209,6 +209,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate ) case ALGO_SHA256D: register_sha256d_algo ( gate ); break; case ALGO_SHA256Q: register_sha256q_algo ( gate ); break; case ALGO_SHA256T: register_sha256t_algo ( gate ); break; + case ALGO_SHA3D: register_sha3d_algo ( gate ); break; case ALGO_SHAVITE3: register_shavite_algo ( gate ); break; case ALGO_SKEIN: register_skein_algo ( gate ); break; case ALGO_SKEIN2: register_skein2_algo ( gate ); break; @@ -247,6 +248,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate ) */ case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break; case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break; + case ALGO_YESCRYPTR8G: register_yescryptr8g_algo ( gate ); break; case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break; case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break; case ALGO_YESPOWER: register_yespower_algo ( gate ); break; diff --git a/algo-gate-api.h b/algo-gate-api.h index eca5267..81e2070 100644 --- a/algo-gate-api.h +++ b/algo-gate-api.h @@ -121,54 +121,55 @@ void ( *hash_suw ) ( void*, const void* ); // Allocate thread local buffers and other initialization specific to miner // threads. -bool ( *miner_thread_init ) ( int ); +bool ( *miner_thread_init ) ( int ); // Generate global blockheader from stratum data. -void ( *stratum_gen_work ) ( struct stratum_ctx*, struct work* ); +void ( *stratum_gen_work ) ( struct stratum_ctx*, struct work* ); // Get thread local copy of blockheader with unique nonce. -void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t*, - bool ); +void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t* ); // Return pointer to nonce in blockheader. -uint32_t *( *get_nonceptr ) ( uint32_t* ); +uint32_t *( *get_nonceptr ) ( uint32_t* ); // Decode getwork blockheader -bool ( *work_decode ) ( const json_t*, struct work* ); +bool ( *work_decode ) ( const json_t*, struct work* ); // Extra getwork data -void ( *decode_extra_data ) ( struct work*, uint64_t* ); +void ( *decode_extra_data ) ( struct work*, uint64_t* ); -bool ( *submit_getwork_result ) ( CURL*, struct work* ); +bool ( *submit_getwork_result ) ( CURL*, struct work* ); -void ( *gen_merkle_root ) ( char*, struct stratum_ctx* ); +void ( *gen_merkle_root ) ( char*, struct stratum_ctx* ); // Increment extranonce -void ( *build_extraheader ) ( struct work*, struct stratum_ctx* ); +void ( *build_extraheader ) ( struct work*, struct stratum_ctx* ); + +void ( *build_block_header ) ( struct work*, uint32_t, uint32_t*, + uint32_t*, uint32_t, uint32_t, + unsigned char* ); -void ( *build_block_header ) ( struct work*, uint32_t, uint32_t*, - uint32_t*, uint32_t, uint32_t ); // Build mining.submit message -void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* ); +void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* ); -char* ( *malloc_txs_request ) ( struct work* ); +char* ( *malloc_txs_request ) ( struct work* ); // Big or little -void ( *set_work_data_endian ) ( struct work* ); +void ( *set_work_data_endian ) ( struct work* ); -double ( *calc_network_diff ) ( struct work* ); +double ( *calc_network_diff ) ( struct work* ); // Wait for first work -bool ( *ready_to_mine ) ( struct work*, struct stratum_ctx*, int ); +bool ( *ready_to_mine ) ( struct work*, struct stratum_ctx*, int ); // Diverge mining threads -bool ( *do_this_thread ) ( int ); +bool ( *do_this_thread ) ( int ); // After do_this_thread -void ( *resync_threads ) ( struct work* ); +void ( *resync_threads ) ( struct work* ); -json_t* (*longpoll_rpc_call) ( CURL*, int*, char* ); -bool ( *stratum_handle_response )( json_t* ); +json_t* (*longpoll_rpc_call) ( CURL*, int*, char* ); +bool ( *stratum_handle_response ) ( json_t* ); set_t optimizations; int ( *get_work_data_size ) (); int ntime_index; @@ -225,7 +226,7 @@ uint32_t *std_get_nonceptr( uint32_t *work_data ); uint32_t *jr2_get_nonceptr( uint32_t *work_data ); void std_get_new_work( struct work *work, struct work *g_work, int thr_id, - uint32_t* end_nonce_ptr, bool clean_job ); + uint32_t* end_nonce_ptr ); void jr2_get_new_work( struct work *work, struct work *g_work, int thr_id, uint32_t* end_nonce_ptr ); @@ -256,7 +257,8 @@ double std_calc_network_diff( struct work *work ); void std_build_block_header( struct work* g_work, uint32_t version, uint32_t *prevhash, uint32_t *merkle_root, - uint32_t ntime, uint32_t nbits ); + uint32_t ntime, uint32_t nbits, + unsigned char *final_sapling_hash ); void std_build_extraheader( struct work *work, struct stratum_ctx *sctx ); diff --git a/algo/keccak/keccak-4way.c b/algo/keccak/keccak-4way.c index 07ad122..0193210 100644 --- a/algo/keccak/keccak-4way.c +++ b/algo/keccak/keccak-4way.c @@ -28,26 +28,28 @@ int scanhash_keccak_8way( struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; __m512i *noncev = (__m512i*)vdata + 9; // aligned const uint32_t Htarg = ptarget[7]; - int thr_id = mythr->id; + const int thr_id = mythr->id; + const bool bench = opt_benchmark; mm512_bswap32_intrlv80_8x64( vdata, pdata ); + *noncev = mm512_intrlv_blend_32( + _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0, + n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev ); do { - *noncev = mm512_intrlv_blend_32( mm512_bswap_32( - _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0, - n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev ); - keccakhash_8way( hash, vdata ); for ( int lane = 0; lane < 8; lane++ ) - if ( hash7[ lane<<1 ] <= Htarg ) + if unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) { extr_lane_8x64( lane_hash, hash, lane, 256 ); - if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) + if ( valid_hash( lane_hash, ptarget ) ) { - pdata[19] = n + lane; + pdata[19] = bswap_32( n + lane ); submit_lane_solution( work, lane_hash, mythr, lane ); } } + *noncev = _mm512_add_epi32( *noncev, + m512_const1_64( 0x0000000800000000 ) ); n += 8; } while ( (n < max_nonce-8) && !work_restart[thr_id].restart); @@ -79,27 +81,28 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; __m256i *noncev = (__m256i*)vdata + 9; // aligned const uint32_t Htarg = ptarget[7]; - int thr_id = mythr->id; + const int thr_id = mythr->id; + const bool bench = opt_benchmark; mm256_bswap32_intrlv80_4x64( vdata, pdata ); + *noncev = mm256_intrlv_blend_32( + _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); do { - *noncev = mm256_intrlv_blend_32( mm256_bswap_32( - _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev ); - keccakhash_4way( hash, vdata ); for ( int lane = 0; lane < 4; lane++ ) - if ( hash7[ lane<<1 ] <= Htarg ) + if unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) { extr_lane_4x64( lane_hash, hash, lane, 256 ); - if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) + if ( valid_hash( lane_hash, ptarget )) { - pdata[19] = n + lane; + pdata[19] = bswap_32( n + lane ); submit_lane_solution( work, lane_hash, mythr, lane ); } } + *noncev = _mm256_add_epi32( *noncev, + m256_const1_64( 0x0000000400000000 ) ); n += 4; - } while ( (n < max_nonce-4) && !work_restart[thr_id].restart); *hashes_done = n - first_nonce + 1; diff --git a/algo/keccak/keccak-gate.c b/algo/keccak/keccak-gate.c index c8334a2..568a5da 100644 --- a/algo/keccak/keccak-gate.c +++ b/algo/keccak/keccak-gate.c @@ -1,5 +1,9 @@ #include "keccak-gate.h" +#include "sph_keccak.h" +int hard_coded_eb = 1; + +// KECCAK bool register_keccak_algo( algo_gate_t* gate ) { @@ -19,6 +23,8 @@ bool register_keccak_algo( algo_gate_t* gate ) return true; }; +// KECCAKC + bool register_keccakc_algo( algo_gate_t* gate ) { gate->optimizations = AVX2_OPT | AVX512_OPT; @@ -37,3 +43,50 @@ bool register_keccakc_algo( algo_gate_t* gate ) return true; }; +// SHA3D + +void sha3d( void *state, const void *input, int len ) +{ + uint32_t _ALIGN(64) buffer[16], hash[16]; + sph_keccak_context ctx_keccak; + + sph_keccak256_init( &ctx_keccak ); + sph_keccak256 ( &ctx_keccak, input, len ); + sph_keccak256_close( &ctx_keccak, (void*) buffer ); + + sph_keccak256_init( &ctx_keccak ); + sph_keccak256 ( &ctx_keccak, buffer, 32 ); + sph_keccak256_close( &ctx_keccak, (void*) hash ); + + memcpy(state, hash, 32); +} + +void sha3d_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx ) +{ + sha3d( merkle_root, sctx->job.coinbase, (int) sctx->job.coinbase_size ); + for ( int i = 0; i < sctx->job.merkle_count; i++ ) + { + memcpy( merkle_root + 32, sctx->job.merkle[i], 32 ); + sha256d( merkle_root, merkle_root, 64 ); + } +} + +bool register_sha3d_algo( algo_gate_t* gate ) +{ + hard_coded_eb = 6; + opt_extranonce = false; + gate->optimizations = AVX2_OPT | AVX512_OPT; + gate->gen_merkle_root = (void*)&sha3d_gen_merkle_root; +#if defined (KECCAK_8WAY) + gate->scanhash = (void*)&scanhash_sha3d_8way; + gate->hash = (void*)&sha3d_hash_8way; +#elif defined (KECCAK_4WAY) + gate->scanhash = (void*)&scanhash_sha3d_4way; + gate->hash = (void*)&sha3d_hash_4way; +#else + gate->scanhash = (void*)&scanhash_sha3d; + gate->hash = (void*)&sha3d_hash; +#endif + return true; +}; + diff --git a/algo/keccak/keccak-gate.h b/algo/keccak/keccak-gate.h index 0b78450..cee3d00 100644 --- a/algo/keccak/keccak-gate.h +++ b/algo/keccak/keccak-gate.h @@ -10,24 +10,37 @@ #define KECCAK_4WAY 1 #endif +extern int hard_coded_eb; + #if defined(KECCAK_8WAY) void keccakhash_8way( void *state, const void *input ); int scanhash_keccak_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); +void sha3d_hash_8way( void *state, const void *input ); +int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); + #elif defined(KECCAK_4WAY) void keccakhash_4way( void *state, const void *input ); int scanhash_keccak_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); +void sha3d_hash_4way( void *state, const void *input ); +int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); + #else void keccakhash( void *state, const void *input ); int scanhash_keccak( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); -#endif +void sha3d_hash( void *state, const void *input ); +int scanhash_sha3d( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); #endif +#endif diff --git a/algo/keccak/keccak-hash-4way.c b/algo/keccak/keccak-hash-4way.c index 46d08cf..10ac7f4 100644 --- a/algo/keccak/keccak-hash-4way.c +++ b/algo/keccak/keccak-hash-4way.c @@ -1,6 +1,7 @@ #include #include #include "keccak-hash-4way.h" +#include "keccak-gate.h" static const uint64_t RC[] = { 0x0000000000000001, 0x0000000000008082, @@ -168,7 +169,7 @@ static void keccak64_8way_close( keccak64_ctx_m512i *kc, void *dst, size_t j; size_t m512_len = byte_len >> 3; - eb = 0x100 >> 8; + eb = hard_coded_eb; if ( kc->ptr == (lim - 8) ) { const uint64_t t = eb | 0x8000000000000000; @@ -349,7 +350,7 @@ static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t byte_len, size_t j; size_t m256_len = byte_len >> 3; - eb = 0x100 >> 8; + eb = hard_coded_eb; if ( kc->ptr == (lim - 8) ) { const uint64_t t = eb | 0x8000000000000000; diff --git a/algo/keccak/keccak.c b/algo/keccak/keccak.c index 1a66bc1..d122ce3 100644 --- a/algo/keccak/keccak.c +++ b/algo/keccak/keccak.c @@ -18,36 +18,34 @@ void keccakhash(void *state, const void *input) memcpy(state, hash, 32); } -int scanhash_keccak( struct work *work, - uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) +int scanhash_keccak( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) { - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - uint32_t n = pdata[19] - 1; - const uint32_t first_nonce = pdata[19]; - //const uint32_t Htarg = ptarget[7]; - int thr_id = mythr->id; // thr_id arg is deprecated + uint32_t _ALIGN(64) hash64[8]; + uint32_t _ALIGN(64) endiandata[32]; + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + uint32_t n = pdata[19]; + const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce; + const int thr_id = mythr->id; - uint32_t _ALIGN(32) hash64[8]; - uint32_t endiandata[32]; + for ( int i=0; i < 19; i++ ) + be32enc( &endiandata[i], pdata[i] ); - for (int i=0; i < 19; i++) - be32enc(&endiandata[i], pdata[i]); + do { + be32enc( &endiandata[19], n ); + keccakhash( hash64, endiandata ); + if ( valid_hash( hash64, ptarget ) && !opt_benchmark ) + { + pdata[19] = n; + submit_solution( work, hash64, mythr ); + } + n++; + } while ( n < last_nonce && !work_restart[thr_id].restart ); - do { - - pdata[19] = ++n; - be32enc(&endiandata[19], n); - keccakhash(hash64, endiandata); - if (((hash64[7]&0xFFFFFF00)==0) && - fulltest(hash64, ptarget)) { - *hashes_done = n - first_nonce + 1; - return true; - } - } while (n < max_nonce && !work_restart[thr_id].restart); - - *hashes_done = n - first_nonce + 1; - pdata[19] = n; - return 0; + *hashes_done = n - first_nonce; + pdata[19] = n; + return 0; } diff --git a/algo/keccak/sha3d-4way.c b/algo/keccak/sha3d-4way.c new file mode 100644 index 0000000..dfd4320 --- /dev/null +++ b/algo/keccak/sha3d-4way.c @@ -0,0 +1,126 @@ +#include "keccak-gate.h" +#include +#include +#include +#include "sph_keccak.h" +#include "keccak-hash-4way.h" + +#if defined(KECCAK_8WAY) + +void sha3d_hash_8way(void *state, const void *input) +{ + uint32_t buffer[16*8] __attribute__ ((aligned (128))); + keccak256_8way_context ctx; + + keccak256_8way_init( &ctx ); + keccak256_8way_update( &ctx, input, 80 ); + keccak256_8way_close( &ctx, buffer ); + + keccak256_8way_init( &ctx ); + keccak256_8way_update( &ctx, buffer, 32 ); + keccak256_8way_close( &ctx, state ); +} + +int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) +{ + uint32_t vdata[24*8] __attribute__ ((aligned (128))); + uint32_t hash[16*8] __attribute__ ((aligned (64))); + uint32_t lane_hash[8] __attribute__ ((aligned (64))); + uint32_t *hash7 = &(hash[49]); // 3*16+1 + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + uint32_t n = pdata[19]; + const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce - 8; + __m512i *noncev = (__m512i*)vdata + 9; // aligned + const uint32_t Htarg = ptarget[7]; + const int thr_id = mythr->id; + const bool bench = opt_benchmark; + + mm512_bswap32_intrlv80_8x64( vdata, pdata ); + *noncev = mm512_intrlv_blend_32( + _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0, + n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev ); + do { + sha3d_hash_8way( hash, vdata ); + + for ( int lane = 0; lane < 8; lane++ ) + if unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) + { + extr_lane_8x64( lane_hash, hash, lane, 256 ); + if ( valid_hash( lane_hash, ptarget ) ) + { + pdata[19] = bswap_32( n + lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); + } + } + *noncev = _mm512_add_epi32( *noncev, + m512_const1_64( 0x0000000800000000 ) ); + n += 8; + + } while ( (n < last_nonce) && !work_restart[thr_id].restart); + + *hashes_done = n - first_nonce; + return 0; +} + +#elif defined(KECCAK_4WAY) + +void sha3d_hash_4way(void *state, const void *input) +{ + uint32_t buffer[16*4] __attribute__ ((aligned (64))); + keccak256_4way_context ctx; + + keccak256_4way_init( &ctx ); + keccak256_4way_update( &ctx, input, 80 ); + keccak256_4way_close( &ctx, buffer ); + + keccak256_4way_init( &ctx ); + keccak256_4way_update( &ctx, buffer, 32 ); + keccak256_4way_close( &ctx, state ); +} + +int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) +{ + uint32_t vdata[24*4] __attribute__ ((aligned (64))); + uint32_t hash[16*4] __attribute__ ((aligned (32))); + uint32_t lane_hash[8] __attribute__ ((aligned (32))); + uint32_t *hash7 = &(hash[25]); // 3*8+1 + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + uint32_t n = pdata[19]; + const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce - 4; + __m256i *noncev = (__m256i*)vdata + 9; // aligned + const uint32_t Htarg = ptarget[7]; + const int thr_id = mythr->id; + const bool bench = opt_benchmark; + + mm256_bswap32_intrlv80_4x64( vdata, pdata ); + *noncev = mm256_intrlv_blend_32( + _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); + do { + sha3d_hash_4way( hash, vdata ); + + for ( int lane = 0; lane < 4; lane++ ) + if unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) + { + extr_lane_4x64( lane_hash, hash, lane, 256 ); + if ( valid_hash( lane_hash, ptarget ) ) + { + pdata[19] = bswap_32( n + lane ); + submit_lane_solution( work, lane_hash, mythr, lane ); + } + } + *noncev = _mm256_add_epi32( *noncev, + m256_const1_64( 0x0000000400000000 ) ); + n += 4; + } while ( (n < last_nonce) && !work_restart[thr_id].restart); + + *hashes_done = n - first_nonce; + return 0; +} + +#endif diff --git a/algo/keccak/sha3d.c b/algo/keccak/sha3d.c new file mode 100644 index 0000000..e9fd369 --- /dev/null +++ b/algo/keccak/sha3d.c @@ -0,0 +1,50 @@ +#include "algo-gate-api.h" +#include +#include +#include +#include "sph_keccak.h" + +void sha3d_hash(void *state, const void *input) +{ + uint32_t buffer[16]; + sph_keccak256_context ctx_keccak; + + sph_keccak256_init( &ctx_keccak ); + sph_keccak256 ( &ctx_keccak, input, 80 ); + sph_keccak256_close( &ctx_keccak, buffer ); + sph_keccak256_init( &ctx_keccak ); + sph_keccak256 ( &ctx_keccak, buffer, 32 ); + sph_keccak256_close( &ctx_keccak, state ); +} + +int scanhash_sha3d( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) +{ + uint32_t _ALIGN(64) hash64[8]; + uint32_t _ALIGN(64) endiandata[32]; + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + uint32_t n = pdata[19]; + const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce; + const int thr_id = mythr->id; + + for ( int i=0; i < 19; i++ ) + be32enc( &endiandata[i], pdata[i] ); + + do { + be32enc( &endiandata[19], n ); + sha3d_hash( hash64, endiandata ); + if ( valid_hash( hash64, ptarget ) && !opt_benchmark ) + { + pdata[19] = n; + submit_solution( work, hash64, mythr ); + } + n++; + } while ( n < last_nonce && !work_restart[thr_id].restart ); + + *hashes_done = n - first_nonce; + pdata[19] = n; + return 0; +} + diff --git a/algo/keccak/sph_keccak.c b/algo/keccak/sph_keccak.c index de7784f..45f3d37 100644 --- a/algo/keccak/sph_keccak.c +++ b/algo/keccak/sph_keccak.c @@ -32,8 +32,8 @@ #include #include - #include "sph_keccak.h" +#include "keccak-gate.h" #ifdef __cplusplus extern "C"{ @@ -1616,7 +1616,7 @@ keccak_core(sph_keccak_context *kc, const void *data, size_t len, size_t lim) } u; \ size_t j; \ \ - eb = (0x100 | (ub & 0xFF)) >> (8 - n); \ + eb = hard_coded_eb; \ if (kc->ptr == (lim - 1)) { \ if (n == 7) { \ u.tmp[0] = eb; \ diff --git a/algo/lyra2/allium-4way.c b/algo/lyra2/allium-4way.c index b8734a0..e29419a 100644 --- a/algo/lyra2/allium-4way.c +++ b/algo/lyra2/allium-4way.c @@ -263,37 +263,31 @@ int scanhash_allium_16way( struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; const uint32_t last_nonce = max_nonce - 16; - const uint32_t Htarg = ptarget[7]; __m512i *noncev = (__m512i*)vdata + 19; // aligned - int thr_id = mythr->id; // thr_id arg is deprecated + const int thr_id = mythr->id; + const bool bench = opt_benchmark; - if ( opt_benchmark ) - ( (uint32_t*)ptarget )[7] = 0x0000ff; + if ( bench ) ( (uint32_t*)ptarget )[7] = 0x0000ff; mm512_bswap32_intrlv80_16x32( vdata, pdata ); + *noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8, + n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n +1, n ); + blake256_16way_init( &allium_16way_ctx.blake ); blake256_16way_update( &allium_16way_ctx.blake, vdata, 64 ); do { - *noncev = mm512_bswap_32( _mm512_set_epi32( n+15, n+14, n+13, n+12, - n+11, n+10, n+ 9, n+ 8, - n+ 7, n+ 6, n+ 5, n+ 4, - n+ 3, n+ 2, n +1, n ) ); - allium_16way_hash( hash, vdata ); - pdata[19] = n; - for ( int lane = 0; lane < 16; lane++ ) if ( (hash+(lane<<3))[7] <= Htarg ) + for ( int lane = 0; lane < 16; lane++ ) + if unlikely( valid_hash( hash+(lane<<3), ptarget ) && !bench ) { - if ( fulltest( hash+(lane<<3), ptarget ) && !opt_benchmark ) - { - pdata[19] = n + lane; - submit_lane_solution( work, hash+(lane<<3), mythr, lane ); - } + pdata[19] = bswap_32( n + lane ); + submit_lane_solution( work, hash+(lane<<3), mythr, lane ); } + *noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) ); n += 16; } while ( (n < last_nonce) && !work_restart[thr_id].restart); - *hashes_done = n - first_nonce; return 0; } @@ -433,14 +427,10 @@ int scanhash_allium_8way( struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; const uint32_t last_nonce = max_nonce - 8; uint32_t n = first_nonce; - const uint64_t Htarg = ptarget[3]; __m256i *noncev = (__m256i*)vdata + 19; // aligned const int thr_id = mythr->id; const bool bench = opt_benchmark; - if unlikely( bench ) - ( (uint32_t*)ptarget )[7] = 0x0000ff; - mm256_bswap32_intrlv80_8x32( vdata, pdata ); *noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ); @@ -453,14 +443,10 @@ int scanhash_allium_8way( struct work *work, uint32_t max_nonce, for ( int lane = 0; lane < 8; lane++ ) { const uint64_t *lane_hash = hash + (lane<<2); - if unlikely( lane_hash[3] <= Htarg ) - { - if likely( ( lane_hash[3] < Htarg && !bench ) - || valid_hash( lane_hash, ptarget ) ) + if unlikely( valid_hash( lane_hash, ptarget ) && !bench ) { pdata[19] = bswap_32( n + lane ); submit_lane_solution( work, lane_hash, mythr, lane ); - } } } n += 8; diff --git a/algo/lyra2/lyra2-gate.c b/algo/lyra2/lyra2-gate.c index 4218a65..b6b90fe 100644 --- a/algo/lyra2/lyra2-gate.c +++ b/algo/lyra2/lyra2-gate.c @@ -220,7 +220,7 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) // Assemble block header algo_gate.build_block_header( g_work, le32dec( sctx->job.version ), (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree, - le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits) ); + le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits), NULL ); for ( t = 0; t < 16; t++ ) g_work->data[ 20+t ] = ((uint32_t*)sctx->job.extra)[t]; } diff --git a/algo/nist5/zr5.c b/algo/nist5/zr5.c index f2293fd..69c07f9 100644 --- a/algo/nist5/zr5.c +++ b/algo/nist5/zr5.c @@ -154,14 +154,13 @@ int scanhash_zr5( struct work *work, uint32_t max_nonce, } void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id, - uint32_t* end_nonce_ptr, bool clean_job ) + uint32_t* end_nonce_ptr ) { // ignore POK in first word -// const int nonce_i = 19; const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t) uint32_t *nonceptr = algo_gate.get_nonceptr( work->data ); if ( memcmp( &work->data[1], &g_work->data[1], wkcmp_sz ) - && ( clean_job || ( *nonceptr >= *end_nonce_ptr ) ) ) + || ( *nonceptr >= *end_nonce_ptr ) ) { work_free( work ); work_copy( work, g_work ); diff --git a/algo/yespower/sha256_p.c b/algo/sha/hmac-sha256-hash.c similarity index 58% rename from algo/yespower/sha256_p.c rename to algo/sha/hmac-sha256-hash.c index 7201797..291b122 100644 --- a/algo/yespower/sha256_p.c +++ b/algo/sha/hmac-sha256-hash.c @@ -28,46 +28,10 @@ #include #include - -#include "sysendian.h" - -#include "sha256_p.h" +#include "simd-utils.h" +#include "hmac-sha256-hash.h" #include "compat.h" - -/* Elementary functions used by SHA256 */ -#define Ch(x, y, z) ((x & (y ^ z)) ^ z) -#define Maj(x, y, z) ((x & (y | z)) | (y & z)) -#define SHR(x, n) (x >> n) -#define ROTR(x, n) ((x >> n) | (x << (32 - n))) -#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) -#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) -#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3)) -#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10)) - -/* SHA256 round function */ -#define RND(a, b, c, d, e, f, g, h, k) \ - t0 = h + S1(e) + Ch(e, f, g) + k; \ - t1 = S0(a) + Maj(a, b, c); \ - d += t0; \ - h = t0 + t1; - -/* Adjusted round function for rotating state */ -#define RNDr(S, W, i, k) \ - RND(S[(64 - i) % 8], S[(65 - i) % 8], \ - S[(66 - i) % 8], S[(67 - i) % 8], \ - S[(68 - i) % 8], S[(69 - i) % 8], \ - S[(70 - i) % 8], S[(71 - i) % 8], \ - W[i] + k) - -/* -static unsigned char PAD[64] = { - 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; -*/ /** * SHA256_Buf(in, len, digest): * Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}. @@ -76,9 +40,9 @@ void SHA256_Buf( const void * in, size_t len, uint8_t digest[32] ) { SHA256_CTX ctx; - SHA256_Init( &ctx ); - SHA256_Update( &ctx, in, len ); - SHA256_Final( digest, &ctx ); + SHA256_Init( &ctx ); + SHA256_Update( &ctx, in, len ); + SHA256_Final( digest, &ctx ); } /** @@ -87,19 +51,18 @@ SHA256_Buf( const void * in, size_t len, uint8_t digest[32] ) * length ${Klen}, and write the result to ${digest}. */ void -HMAC_SHA256_Buf(const void * K, size_t Klen, const void * in, size_t len, - uint8_t digest[32]) +HMAC_SHA256_Buf( const void *K, size_t Klen, const void *in, size_t len, + uint8_t digest[32]) { - HMAC_SHA256_CTX ctx; - - HMAC_SHA256_Init( &ctx, K, Klen ); - HMAC_SHA256_Update( &ctx, in, len ); - HMAC_SHA256_Final( digest, &ctx ); + HMAC_SHA256_CTX ctx; + HMAC_SHA256_Init( &ctx, K, Klen ); + HMAC_SHA256_Update( &ctx, in, len ); + HMAC_SHA256_Final( digest, &ctx ); } /* Initialize an HMAC-SHA256 operation with the given key. */ void -HMAC_SHA256_Init( HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen ) +HMAC_SHA256_Init( HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen ) { unsigned char pad[64]; unsigned char khash[32]; @@ -107,7 +70,8 @@ HMAC_SHA256_Init( HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen ) size_t i; /* If Klen > 64, the key is really SHA256(K). */ - if (Klen > 64) { + if ( Klen > 64 ) + { SHA256_Init( &ctx->ictx ); SHA256_Update( &ctx->ictx, K, Klen ); SHA256_Final( khash, &ctx->ictx ); @@ -116,7 +80,7 @@ HMAC_SHA256_Init( HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen ) } /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */ - SHA256_Init( &ctx->ictx ); + SHA256_Init( &ctx->ictx ); memset( pad, 0x36, 64 ); for ( i = 0; i < Klen; i++ ) pad[i] ^= K[i]; @@ -128,23 +92,19 @@ HMAC_SHA256_Init( HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen ) for ( i = 0; i < Klen; i++ ) pad[i] ^= K[i]; SHA256_Update( &ctx->octx, pad, 64 ); - - /* Clean the stack. */ - //memset(khash, 0, 32); } /* Add bytes to the HMAC-SHA256 operation. */ void -HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void *in, size_t len) +HMAC_SHA256_Update( HMAC_SHA256_CTX *ctx, const void *in, size_t len ) { - /* Feed data to the inner SHA256 operation. */ SHA256_Update( &ctx->ictx, in, len ); } /* Finish an HMAC-SHA256 operation. */ void -HMAC_SHA256_Final(unsigned char digest[32], HMAC_SHA256_CTX * ctx ) +HMAC_SHA256_Final( unsigned char digest[32], HMAC_SHA256_CTX *ctx ) { unsigned char ihash[32]; @@ -156,9 +116,6 @@ HMAC_SHA256_Final(unsigned char digest[32], HMAC_SHA256_CTX * ctx ) /* Finish the outer SHA256 operation. */ SHA256_Final( digest, &ctx->octx ); - - /* Clean the stack. */ - //memset(ihash, 0, 32); } /** @@ -167,52 +124,51 @@ HMAC_SHA256_Final(unsigned char digest[32], HMAC_SHA256_CTX * ctx ) * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). */ void -PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, - size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen) +PBKDF2_SHA256( const uint8_t *passwd, size_t passwdlen, const uint8_t *salt, + size_t saltlen, uint64_t c, uint8_t *buf, size_t dkLen ) { HMAC_SHA256_CTX PShctx, hctx; uint8_t _ALIGN(128) T[32]; uint8_t _ALIGN(128) U[32]; - uint8_t ivec[4]; + uint32_t ivec; size_t i, clen; uint64_t j; int k; /* Compute HMAC state after processing P and S. */ - HMAC_SHA256_Init(&PShctx, passwd, passwdlen); - HMAC_SHA256_Update(&PShctx, salt, saltlen); + HMAC_SHA256_Init( &PShctx, passwd, passwdlen ); + HMAC_SHA256_Update( &PShctx, salt, saltlen ); /* Iterate through the blocks. */ - for (i = 0; i * 32 < dkLen; i++) { + for ( i = 0; i * 32 < dkLen; i++ ) + { /* Generate INT(i + 1). */ - be32enc(ivec, (uint32_t)(i + 1)); + ivec = bswap_32( i+1 ); /* Compute U_1 = PRF(P, S || INT(i)). */ - memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX)); - HMAC_SHA256_Update(&hctx, ivec, 4); - HMAC_SHA256_Final(U, &hctx); + memcpy( &hctx, &PShctx, sizeof(HMAC_SHA256_CTX) ); + HMAC_SHA256_Update( &hctx, &ivec, 4 ); + HMAC_SHA256_Final( U, &hctx ); /* T_i = U_1 ... */ - memcpy(T, U, 32); + memcpy( T, U, 32 ); - for (j = 2; j <= c; j++) { + for ( j = 2; j <= c; j++ ) + { /* Compute U_j. */ - HMAC_SHA256_Init(&hctx, passwd, passwdlen); - HMAC_SHA256_Update(&hctx, U, 32); - HMAC_SHA256_Final(U, &hctx); + HMAC_SHA256_Init( &hctx, passwd, passwdlen ); + HMAC_SHA256_Update( &hctx, U, 32 ); + HMAC_SHA256_Final( U, &hctx ); /* ... xor U_j ... */ - for (k = 0; k < 32; k++) + for ( k = 0; k < 32; k++ ) T[k] ^= U[k]; } /* Copy as many bytes as necessary into buf. */ clen = dkLen - i * 32; - if (clen > 32) + if ( clen > 32 ) clen = 32; - memcpy(&buf[i * 32], T, clen); + memcpy( &buf[i * 32], T, clen ); } - - /* Clean PShctx, since we never called _Final on it. */ - //memset(&PShctx, 0, sizeof(HMAC_SHA256_CTX_Y)); } diff --git a/algo/yespower/sha256_p.h b/algo/sha/hmac-sha256-hash.h similarity index 88% rename from algo/yespower/sha256_p.h rename to algo/sha/hmac-sha256-hash.h index 2481caf..0a020f6 100644 --- a/algo/yespower/sha256_p.h +++ b/algo/sha/hmac-sha256-hash.h @@ -26,23 +26,24 @@ * $FreeBSD: src/lib/libmd/sha256_Y.h,v 1.2 2006/01/17 15:35:56 phk Exp $ */ -#ifndef _SHA256_H_ -#define _SHA256_H_ +#ifndef HMAC_SHA256_H__ +#define HMAC_SHA256_H__ #include #include #include -typedef struct HMAC_SHA256Context { - SHA256_CTX ictx; - SHA256_CTX octx; +typedef struct HMAC_SHA256Context +{ + SHA256_CTX ictx; + SHA256_CTX octx; } HMAC_SHA256_CTX; -void SHA256_Buf( const void * in, size_t len, uint8_t digest[32] ); +void SHA256_Buf( const void *, size_t len, uint8_t digest[32] ); void HMAC_SHA256_Init( HMAC_SHA256_CTX *, const void *, size_t ); void HMAC_SHA256_Update( HMAC_SHA256_CTX *, const void *, size_t ); void HMAC_SHA256_Final( unsigned char [32], HMAC_SHA256_CTX * ); -void HMAC_SHA256_Buf( const void * K, size_t Klen, const void * in, +void HMAC_SHA256_Buf( const void *, size_t Klen, const void *, size_t len, uint8_t digest[32] ); /** @@ -53,4 +54,4 @@ void HMAC_SHA256_Buf( const void * K, size_t Klen, const void * in, void PBKDF2_SHA256( const uint8_t *, size_t, const uint8_t *, size_t, uint64_t, uint8_t *, size_t); -#endif /* !_SHA256_H_ */ +#endif // HMAC_SHA256_H__ diff --git a/algo/sha/sha256-hash-4way.c b/algo/sha/sha256-hash-4way.c index 2167407..ed10673 100644 --- a/algo/sha/sha256-hash-4way.c +++ b/algo/sha/sha256-hash-4way.c @@ -94,6 +94,37 @@ static const uint32_t K256[64] = _mm_xor_si128( _mm_xor_si128( \ mm128_ror_32(x, 17), mm128_ror_32(x, 19) ), _mm_srli_epi32(x, 10) ) +#define SHA2s_4WAY_STEP(A, B, C, D, E, F, G, H, i, j) \ +do { \ + __m128i K = _mm_set1_epi32( K256[( (j)+(i) )] ); \ + __m128i T1 = mm128_ror_32( E, 14 ); \ + __m128i T2 = mm128_ror_32( A, 9 ); \ + __m128i T3 = _mm_xor_si128( F, G ); \ + __m128i T4 = _mm_or_si128( A, B ); \ + __m128i T5 = _mm_and_si128( A, B ); \ + K = _mm_add_epi32( K, W[i] ); \ + T1 = _mm_xor_si128( T1, E ); \ + T2 = _mm_xor_si128( T2, A ); \ + T3 = _mm_and_si128( T3, E ); \ + T4 = _mm_and_si128( T4, C ); \ + K = _mm_add_epi32( H, K ); \ + T1 = mm128_ror_32( T1, 5 ); \ + T2 = mm128_ror_32( T2, 11 ); \ + T3 = _mm_xor_si128( T3, G ); \ + T4 = _mm_or_si128( T4, T5 ); \ + T1 = _mm_xor_si128( T1, E ); \ + T2 = _mm_xor_si128( T2, A ); \ + T1 = mm128_ror_32( T1, 6 ); \ + T2 = mm128_ror_32( T2, 2 ); \ + T1 = _mm_add_epi32( T1, T3 ); \ + T2 = _mm_add_epi32( T2, T4 ); \ + T1 = _mm_add_epi32( T1, K ); \ + H = _mm_add_epi32( T1, T2 ); \ + D = _mm_add_epi32( D, T1 ); \ +} while (0) + + +/* #define SHA2s_4WAY_STEP(A, B, C, D, E, F, G, H, i, j) \ do { \ __m128i T1, T2; \ @@ -104,6 +135,8 @@ do { \ D = _mm_add_epi32( D, T1 ); \ H = _mm_add_epi32( T1, T2 ); \ } while (0) +*/ + static void sha256_4way_round( sha256_4way_context *ctx, __m128i *in, __m128i r[8] ) diff --git a/algo/sha/sha512-hash-4way.c b/algo/sha/sha512-hash-4way.c index d056da0..9f5349b 100644 --- a/algo/sha/sha512-hash-4way.c +++ b/algo/sha/sha512-hash-4way.c @@ -319,7 +319,7 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst ) // SHA-512 4 way 64 bit - +/* #define CH(X, Y, Z) \ _mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z ) @@ -327,6 +327,15 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst ) _mm256_or_si256( _mm256_and_si256( X, Y ), \ _mm256_and_si256( _mm256_or_si256( X, Y ), Z ) ) +#define BSG5_0(x) \ + mm256_ror_64( _mm256_xor_si256( mm256_ror_64( \ + _mm256_xor_si256( mm256_ror_64( x, 5 ), x ), 6 ), x ), 28 ) + +#define BSG5_1(x) \ + mm256_ror_64( _mm256_xor_si256( mm256_ror_64( \ + _mm256_xor_si256( mm256_ror_64( x, 23 ), x ), 4 ), x ), 14 ) +*/ +/* #define BSG5_0(x) \ _mm256_xor_si256( _mm256_xor_si256( \ mm256_ror_64(x, 28), mm256_ror_64(x, 34) ), mm256_ror_64(x, 39) ) @@ -334,7 +343,8 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst ) #define BSG5_1(x) \ _mm256_xor_si256( _mm256_xor_si256( \ mm256_ror_64(x, 14), mm256_ror_64(x, 18) ), mm256_ror_64(x, 41) ) - +*/ +/* #define SSG5_0(x) \ _mm256_xor_si256( _mm256_xor_si256( \ mm256_ror_64(x, 1), mm256_ror_64(x, 8) ), _mm256_srli_epi64(x, 7) ) @@ -342,7 +352,7 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst ) #define SSG5_1(x) \ _mm256_xor_si256( _mm256_xor_si256( \ mm256_ror_64(x, 19), mm256_ror_64(x, 61) ), _mm256_srli_epi64(x, 6) ) - +*/ // Interleave SSG0 & SSG1 for better throughput. // return ssg0(w0) + ssg1(w1) static inline __m256i ssg512_add( __m256i w0, __m256i w1 ) @@ -361,7 +371,7 @@ static inline __m256i ssg512_add( __m256i w0, __m256i w1 ) return _mm256_add_epi64( w0a, w1a ); } - +/* #define SSG512x2_0( w0, w1, i ) do \ { \ __m256i X0a, X1a, X0b, X1b; \ @@ -391,7 +401,51 @@ static inline __m256i ssg512_add( __m256i w0, __m256i w1 ) w0 = _mm256_xor_si256( X0a, X0b ); \ w1 = _mm256_xor_si256( X1a, X1b ); \ } while(0) +*/ +#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \ +do { \ + __m256i K = _mm256_set1_epi64x( K512[ i ] ); \ + __m256i T1 = mm256_ror_64( E, 23 ); \ + __m256i T2 = mm256_ror_64( A, 5 ); \ + __m256i T3 = _mm256_xor_si256( F, G ); \ + __m256i T4 = _mm256_or_si256( A, B ); \ + __m256i T5 = _mm256_and_si256( A, B ); \ + K = _mm256_add_epi64( K, W[i] ); \ + T1 = _mm256_xor_si256( T1, E ); \ + T2 = _mm256_xor_si256( T2, A ); \ + T3 = _mm256_and_si256( T3, E ); \ + T4 = _mm256_and_si256( T4, C ); \ + K = _mm256_add_epi64( H, K ); \ + T1 = mm256_ror_64( T1, 4 ); \ + T2 = mm256_ror_64( T2, 6 ); \ + T3 = _mm256_xor_si256( T3, G ); \ + T4 = _mm256_or_si256( T4, T5 ); \ + T1 = _mm256_xor_si256( T1, E ); \ + T2 = _mm256_xor_si256( T2, A ); \ + T1 = mm256_ror_64( T1, 14 ); \ + T2 = mm256_ror_64( T2, 28 ); \ + T1 = _mm256_add_epi64( T1, T3 ); \ + T2 = _mm256_add_epi64( T2, T4 ); \ + T1 = _mm256_add_epi64( T1, K ); \ + H = _mm256_add_epi64( T1, T2 ); \ + D = _mm256_add_epi64( D, T1 ); \ +} while (0) + +/* +#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \ +do { \ + __m256i K = _mm256_add_epi64( W[i], _mm256_set1_epi64x( K512[ i ] ) ); \ + __m256i T1 = BSG5_1(E); \ + __m256i T2 = BSG5_0(A); \ + T1 = mm256_add4_64( T1, H, CH(E, F, G), K ); \ + T2 = _mm256_add_epi64( T2, MAJ(A, B, C) ); \ + D = _mm256_add_epi64( D, T1 ); \ + H = _mm256_add_epi64( T1, T2 ); \ +} while (0) +*/ + +/* #define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \ do { \ __m256i T1, T2; \ @@ -402,7 +456,7 @@ do { \ D = _mm256_add_epi64( D, T1 ); \ H = _mm256_add_epi64( T1, T2 ); \ } while (0) - +*/ static void sha512_4way_round( sha512_4way_context *ctx, __m256i *in, __m256i r[8] ) diff --git a/algo/x13/drop.c b/algo/x13/drop.c index ab6f39e..53aec2a 100644 --- a/algo/x13/drop.c +++ b/algo/x13/drop.c @@ -214,14 +214,14 @@ int scanhash_drop( struct work *work, uint32_t max_nonce, } void drop_get_new_work( struct work* work, struct work* g_work, int thr_id, - uint32_t* end_nonce_ptr, bool clean_job ) + uint32_t* end_nonce_ptr ) { // ignore POK in first word // const int nonce_i = 19; const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t) uint32_t *nonceptr = algo_gate.get_nonceptr( work->data ); if ( memcmp( &work->data[1], &g_work->data[1], wkcmp_sz ) - && ( clean_job || ( *nonceptr >= *end_nonce_ptr ) ) ) + || ( *nonceptr >= *end_nonce_ptr ) ) { work_free( work ); work_copy( work, g_work ); diff --git a/algo/x17/x17-4way.c b/algo/x17/x17-4way.c index 9055670..d2bbdd0 100644 --- a/algo/x17/x17-4way.c +++ b/algo/x17/x17-4way.c @@ -299,25 +299,28 @@ int scanhash_x17_8way( struct work *work, uint32_t max_nonce, uint32_t n = first_nonce; const int thr_id = mythr->id; const uint32_t Htarg = ptarget[7]; + const bool bench = opt_benchmark; mm512_bswap32_intrlv80_8x64( vdata, pdata ); + *noncev = mm512_intrlv_blend_32( + _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0, + n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); do { - *noncev = mm512_intrlv_blend_32( mm512_bswap_32( - _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0, - n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev ); x17_8way_hash( hash, vdata ); for ( int lane = 0; lane < 8; lane++ ) - if unlikely( ( hash7[ lane ] <= Htarg ) ) + if unlikely( ( hash7[ lane ] <= Htarg ) && !bench ) { extr_lane_8x32( lane_hash, hash, lane, 256 ); - if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) ) + if likely( valid_hash( lane_hash, ptarget ) ) { - pdata[19] = n + lane; + pdata[19] = bswap_32( n + lane ); submit_lane_solution( work, lane_hash, mythr, lane ); } } + *noncev = _mm512_add_epi32( *noncev, + m512_const1_64( 0x0000000800000000 ) ); n += 8; } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); @@ -496,7 +499,7 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce, if ( unlikely( hash7[ lane ] <= Htarg && !bench ) ) { extr_lane_4x32( lane_hash, hash, lane, 256 ); - if ( ( hash7[ lane ] < Htarg ) || valid_hash( lane_hash, ptarget ) ) + if ( valid_hash( lane_hash, ptarget ) ) { pdata[19] = bswap_32( n + lane ); submit_lane_solution( work, lane_hash, mythr, lane ); diff --git a/algo/yescrypt/sha256_Y.c b/algo/yescrypt/sha256_Y.c deleted file mode 100644 index 7b778ed..0000000 --- a/algo/yescrypt/sha256_Y.c +++ /dev/null @@ -1,409 +0,0 @@ -/*- - * Copyright 2005,2007,2009 Colin Percival - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include - -#include -#include - -#include "sysendian.h" - -#include "sha256_Y.h" -#include "compat.h" - -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static void -be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len) -{ - size_t i; - - for (i = 0; i < len / 4; i++) - be32enc(dst + i * 4, src[i]); -} - -/* - * Decode a big-endian length len vector of (unsigned char) into a length - * len/4 vector of (uint32_t). Assumes len is a multiple of 4. - */ -static void -be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len) -{ - size_t i; - - for (i = 0; i < len / 4; i++) - dst[i] = be32dec(src + i * 4); -} - -/* Elementary functions used by SHA256 */ -#define Ch(x, y, z) ((x & (y ^ z)) ^ z) -#define Maj(x, y, z) ((x & (y | z)) | (y & z)) -#define SHR(x, n) (x >> n) -#define ROTR(x, n) ((x >> n) | (x << (32 - n))) -#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) -#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) -#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3)) -#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10)) - -/* SHA256 round function */ -#define RND(a, b, c, d, e, f, g, h, k) \ - t0 = h + S1(e) + Ch(e, f, g) + k; \ - t1 = S0(a) + Maj(a, b, c); \ - d += t0; \ - h = t0 + t1; - -/* Adjusted round function for rotating state */ -#define RNDr(S, W, i, k) \ - RND(S[(64 - i) % 8], S[(65 - i) % 8], \ - S[(66 - i) % 8], S[(67 - i) % 8], \ - S[(68 - i) % 8], S[(69 - i) % 8], \ - S[(70 - i) % 8], S[(71 - i) % 8], \ - W[i] + k) - -/* - * SHA256 block compression function. The 256-bit state is transformed via - * the 512-bit input block to produce a new state. - */ -static void -SHA256_Transform_Y(uint32_t * state, const unsigned char block[64]) -{ - uint32_t _ALIGN(128) W[64], S[8]; - uint32_t t0, t1; - int i; - - /* 1. Prepare message schedule W. */ - be32dec_vect(W, block, 64); - for (i = 16; i < 64; i++) - W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16]; - - /* 2. Initialize working variables. */ - memcpy(S, state, 32); - - /* 3. Mix. */ - RNDr(S, W, 0, 0x428a2f98); - RNDr(S, W, 1, 0x71374491); - RNDr(S, W, 2, 0xb5c0fbcf); - RNDr(S, W, 3, 0xe9b5dba5); - RNDr(S, W, 4, 0x3956c25b); - RNDr(S, W, 5, 0x59f111f1); - RNDr(S, W, 6, 0x923f82a4); - RNDr(S, W, 7, 0xab1c5ed5); - RNDr(S, W, 8, 0xd807aa98); - RNDr(S, W, 9, 0x12835b01); - RNDr(S, W, 10, 0x243185be); - RNDr(S, W, 11, 0x550c7dc3); - RNDr(S, W, 12, 0x72be5d74); - RNDr(S, W, 13, 0x80deb1fe); - RNDr(S, W, 14, 0x9bdc06a7); - RNDr(S, W, 15, 0xc19bf174); - RNDr(S, W, 16, 0xe49b69c1); - RNDr(S, W, 17, 0xefbe4786); - RNDr(S, W, 18, 0x0fc19dc6); - RNDr(S, W, 19, 0x240ca1cc); - RNDr(S, W, 20, 0x2de92c6f); - RNDr(S, W, 21, 0x4a7484aa); - RNDr(S, W, 22, 0x5cb0a9dc); - RNDr(S, W, 23, 0x76f988da); - RNDr(S, W, 24, 0x983e5152); - RNDr(S, W, 25, 0xa831c66d); - RNDr(S, W, 26, 0xb00327c8); - RNDr(S, W, 27, 0xbf597fc7); - RNDr(S, W, 28, 0xc6e00bf3); - RNDr(S, W, 29, 0xd5a79147); - RNDr(S, W, 30, 0x06ca6351); - RNDr(S, W, 31, 0x14292967); - RNDr(S, W, 32, 0x27b70a85); - RNDr(S, W, 33, 0x2e1b2138); - RNDr(S, W, 34, 0x4d2c6dfc); - RNDr(S, W, 35, 0x53380d13); - RNDr(S, W, 36, 0x650a7354); - RNDr(S, W, 37, 0x766a0abb); - RNDr(S, W, 38, 0x81c2c92e); - RNDr(S, W, 39, 0x92722c85); - RNDr(S, W, 40, 0xa2bfe8a1); - RNDr(S, W, 41, 0xa81a664b); - RNDr(S, W, 42, 0xc24b8b70); - RNDr(S, W, 43, 0xc76c51a3); - RNDr(S, W, 44, 0xd192e819); - RNDr(S, W, 45, 0xd6990624); - RNDr(S, W, 46, 0xf40e3585); - RNDr(S, W, 47, 0x106aa070); - RNDr(S, W, 48, 0x19a4c116); - RNDr(S, W, 49, 0x1e376c08); - RNDr(S, W, 50, 0x2748774c); - RNDr(S, W, 51, 0x34b0bcb5); - RNDr(S, W, 52, 0x391c0cb3); - RNDr(S, W, 53, 0x4ed8aa4a); - RNDr(S, W, 54, 0x5b9cca4f); - RNDr(S, W, 55, 0x682e6ff3); - RNDr(S, W, 56, 0x748f82ee); - RNDr(S, W, 57, 0x78a5636f); - RNDr(S, W, 58, 0x84c87814); - RNDr(S, W, 59, 0x8cc70208); - RNDr(S, W, 60, 0x90befffa); - RNDr(S, W, 61, 0xa4506ceb); - RNDr(S, W, 62, 0xbef9a3f7); - RNDr(S, W, 63, 0xc67178f2); - - /* 4. Mix local working variables into global state */ - for (i = 0; i < 8; i++) - state[i] += S[i]; -#if 0 - /* Clean the stack. */ - memset(W, 0, 256); - memset(S, 0, 32); - t0 = t1 = 0; -#endif -} - -static unsigned char PAD[64] = { - 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* Add padding and terminating bit-count. */ -static void -SHA256_Pad_Y(SHA256_CTX_Y * ctx) -{ - unsigned char len[8]; - uint32_t r, plen; - - /* - * Convert length to a vector of bytes -- we do this now rather - * than later because the length will change after we pad. - */ - be32enc_vect(len, ctx->count, 8); - - /* Add 1--64 bytes so that the resulting length is 56 mod 64 */ - r = (ctx->count[1] >> 3) & 0x3f; - plen = (r < 56) ? (56 - r) : (120 - r); - SHA256_Update_Y(ctx, PAD, (size_t)plen); - - /* Add the terminating bit-count */ - SHA256_Update_Y(ctx, len, 8); -} - -/* SHA-256 initialization. Begins a SHA-256 operation. */ -void -SHA256_Init_Y(SHA256_CTX_Y * ctx) -{ - /* Zero bits processed so far */ - ctx->count[0] = ctx->count[1] = 0; - - /* Magic initialization constants */ - ctx->state[0] = 0x6A09E667; - ctx->state[1] = 0xBB67AE85; - ctx->state[2] = 0x3C6EF372; - ctx->state[3] = 0xA54FF53A; - ctx->state[4] = 0x510E527F; - ctx->state[5] = 0x9B05688C; - ctx->state[6] = 0x1F83D9AB; - ctx->state[7] = 0x5BE0CD19; -} - -/* Add bytes into the hash */ -void -SHA256_Update_Y(SHA256_CTX_Y * ctx, const void *in, size_t len) -{ - uint32_t bitlen[2]; - uint32_t r; - const unsigned char *src = in; - - /* Number of bytes left in the buffer from previous updates */ - r = (ctx->count[1] >> 3) & 0x3f; - - /* Convert the length into a number of bits */ - bitlen[1] = ((uint32_t)len) << 3; - bitlen[0] = (uint32_t)(len >> 29); - - /* Update number of bits */ - if ((ctx->count[1] += bitlen[1]) < bitlen[1]) - ctx->count[0]++; - ctx->count[0] += bitlen[0]; - - /* Handle the case where we don't need to perform any transforms */ - if (len < 64 - r) { - memcpy(&ctx->buf[r], src, len); - return; - } - - /* Finish the current block */ - memcpy(&ctx->buf[r], src, 64 - r); - SHA256_Transform_Y(ctx->state, ctx->buf); - src += 64 - r; - len -= 64 - r; - - /* Perform complete blocks */ - while (len >= 64) { - SHA256_Transform_Y(ctx->state, src); - src += 64; - len -= 64; - } - - /* Copy left over data into buffer */ - memcpy(ctx->buf, src, len); -} - -/* - * SHA-256 finalization. Pads the input data, exports the hash value, - * and clears the context state. - */ -void -SHA256_Final_Y(unsigned char digest[32], SHA256_CTX_Y * ctx) -{ - /* Add padding */ - SHA256_Pad_Y(ctx); - - /* Write the hash */ - be32enc_vect(digest, ctx->state, 32); - - /* Clear the context state */ - memset((void *)ctx, 0, sizeof(*ctx)); -} - -/* Initialize an HMAC-SHA256 operation with the given key. */ -void -HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y * ctx, const void * _K, size_t Klen) -{ - unsigned char pad[64]; - unsigned char khash[32]; - const unsigned char * K = _K; - size_t i; - - /* If Klen > 64, the key is really SHA256(K). */ - if (Klen > 64) { - SHA256_Init(&ctx->ictx); - SHA256_Update(&ctx->ictx, K, Klen); - SHA256_Final(khash, &ctx->ictx); - K = khash; - Klen = 32; - } - - /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */ - SHA256_Init(&ctx->ictx); - memset(pad, 0x36, 64); - for (i = 0; i < Klen; i++) - pad[i] ^= K[i]; - SHA256_Update(&ctx->ictx, pad, 64); - - /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */ - SHA256_Init(&ctx->octx); - memset(pad, 0x5c, 64); - for (i = 0; i < Klen; i++) - pad[i] ^= K[i]; - SHA256_Update(&ctx->octx, pad, 64); - - /* Clean the stack. */ - //memset(khash, 0, 32); -} - -/* Add bytes to the HMAC-SHA256 operation. */ -void -HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y * ctx, const void *in, size_t len) -{ - - /* Feed data to the inner SHA256 operation. */ - SHA256_Update(&ctx->ictx, in, len); -} - -/* Finish an HMAC-SHA256 operation. */ -void -HMAC_SHA256_Final_Y(unsigned char digest[32], HMAC_SHA256_CTX_Y * ctx) -{ - unsigned char ihash[32]; - - /* Finish the inner SHA256 operation. */ - SHA256_Final(ihash, &ctx->ictx); - - /* Feed the inner hash to the outer SHA256 operation. */ - SHA256_Update(&ctx->octx, ihash, 32); - - /* Finish the outer SHA256 operation. */ - SHA256_Final(digest, &ctx->octx); - - /* Clean the stack. */ - //memset(ihash, 0, 32); -} - -/** - * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): - * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and - * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). - */ -void -PBKDF2_SHA256_Y(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, - size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen) -{ - HMAC_SHA256_CTX_Y PShctx, hctx; - uint8_t _ALIGN(128) T[32]; - uint8_t _ALIGN(128) U[32]; - uint8_t ivec[4]; - size_t i, clen; - uint64_t j; - int k; - - /* Compute HMAC state after processing P and S. */ - HMAC_SHA256_Init_Y(&PShctx, passwd, passwdlen); - HMAC_SHA256_Update_Y(&PShctx, salt, saltlen); - - /* Iterate through the blocks. */ - for (i = 0; i * 32 < dkLen; i++) { - /* Generate INT(i + 1). */ - be32enc(ivec, (uint32_t)(i + 1)); - - /* Compute U_1 = PRF(P, S || INT(i)). */ - memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX_Y)); - HMAC_SHA256_Update_Y(&hctx, ivec, 4); - HMAC_SHA256_Final_Y(U, &hctx); - - /* T_i = U_1 ... */ - memcpy(T, U, 32); - - for (j = 2; j <= c; j++) { - /* Compute U_j. */ - HMAC_SHA256_Init_Y(&hctx, passwd, passwdlen); - HMAC_SHA256_Update_Y(&hctx, U, 32); - HMAC_SHA256_Final_Y(U, &hctx); - - /* ... xor U_j ... */ - for (k = 0; k < 32; k++) - T[k] ^= U[k]; - } - - /* Copy as many bytes as necessary into buf. */ - clen = dkLen - i * 32; - if (clen > 32) - clen = 32; - memcpy(&buf[i * 32], T, clen); - } - - /* Clean PShctx, since we never called _Final on it. */ - //memset(&PShctx, 0, sizeof(HMAC_SHA256_CTX_Y)); -} diff --git a/algo/yescrypt/sysendian.h b/algo/yescrypt/sysendian.h deleted file mode 100644 index 29933d4..0000000 --- a/algo/yescrypt/sysendian.h +++ /dev/null @@ -1,124 +0,0 @@ -/*- - * Copyright 2007-2009 Colin Percival - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * This file was originally written by Colin Percival as part of the Tarsnap - * online backup system. - */ -#ifndef _SYSENDIAN_H_ -#define _SYSENDIAN_H_ - -/* If we don't have be64enc, the we have isn't usable. */ -#if !HAVE_DECL_BE64ENC -#undef HAVE_SYS_ENDIAN_H -#endif - -#ifdef HAVE_SYS_ENDIAN_H - -#include - -#else - -#include - - - -static __inline uint64_t -be64dec(const void *pp) -{ - const uint8_t *p = (uint8_t const *)pp; - - return ((uint64_t)(p[7]) + ((uint64_t)(p[6]) << 8) + - ((uint64_t)(p[5]) << 16) + ((uint64_t)(p[4]) << 24) + - ((uint64_t)(p[3]) << 32) + ((uint64_t)(p[2]) << 40) + - ((uint64_t)(p[1]) << 48) + ((uint64_t)(p[0]) << 56)); -} - -static __inline void -be64enc(void *pp, uint64_t x) -{ - uint8_t * p = (uint8_t *)pp; - - p[7] = x & 0xff; - p[6] = (x >> 8) & 0xff; - p[5] = (x >> 16) & 0xff; - p[4] = (x >> 24) & 0xff; - p[3] = (x >> 32) & 0xff; - p[2] = (x >> 40) & 0xff; - p[1] = (x >> 48) & 0xff; - p[0] = (x >> 56) & 0xff; -} - - - -static __inline uint64_t -le64dec(const void *pp) -{ - const uint8_t *p = (uint8_t const *)pp; - - return ((uint64_t)(p[0]) + ((uint64_t)(p[1]) << 8) + - ((uint64_t)(p[2]) << 16) + ((uint64_t)(p[3]) << 24) + - ((uint64_t)(p[4]) << 32) + ((uint64_t)(p[5]) << 40) + - ((uint64_t)(p[6]) << 48) + ((uint64_t)(p[7]) << 56)); -} - -static __inline void -le64enc(void *pp, uint64_t x) -{ - uint8_t * p = (uint8_t *)pp; - - p[0] = x & 0xff; - p[1] = (x >> 8) & 0xff; - p[2] = (x >> 16) & 0xff; - p[3] = (x >> 24) & 0xff; - p[4] = (x >> 32) & 0xff; - p[5] = (x >> 40) & 0xff; - p[6] = (x >> 48) & 0xff; - p[7] = (x >> 56) & 0xff; -} - - -static __inline uint32_t -be32dec(const void *pp) -{ - const uint8_t *p = (uint8_t const *)pp; - - return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) + - ((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24)); -} - -static __inline void -be32enc(void *pp, uint32_t x) -{ - uint8_t * p = (uint8_t *)pp; - - p[3] = x & 0xff; - p[2] = (x >> 8) & 0xff; - p[1] = (x >> 16) & 0xff; - p[0] = (x >> 24) & 0xff; -} - -#endif /* !HAVE_SYS_ENDIAN_H */ - -#endif /* !_SYSENDIAN_H_ */ diff --git a/algo/yescrypt/yescrypt-simd.c b/algo/yescrypt/yescrypt-simd.c index e70c37e..d2e46b1 100644 --- a/algo/yescrypt/yescrypt-simd.c +++ b/algo/yescrypt/yescrypt-simd.c @@ -48,9 +48,7 @@ #include #include #include -#include "sha256_Y.h" -#include "sysendian.h" - +#include "algo/sha/hmac-sha256-hash.h" #include "yescrypt.h" #include "yescrypt-platform.h" @@ -1312,7 +1310,7 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local, } /* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */ - PBKDF2_SHA256_Y(passwd, passwdlen, salt, saltlen, 1, B, B_size); + PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, B, B_size); if (t || flags) memcpy(sha256, B, sizeof(sha256)); @@ -1342,7 +1340,7 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local, } /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */ - PBKDF2_SHA256_Y(passwd, passwdlen, B, B_size, 1, buf, buflen); + PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, buf, buflen); /* * Except when computing classic scrypt, allow all computation so far @@ -1354,14 +1352,14 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local, if ((t || flags) && buflen == sizeof(sha256)) { /* Compute ClientKey */ { - HMAC_SHA256_CTX_Y ctx; - HMAC_SHA256_Init_Y(&ctx, buf, buflen); + HMAC_SHA256_CTX ctx; + HMAC_SHA256_Init(&ctx, buf, buflen); if ( yescrypt_client_key ) - HMAC_SHA256_Update_Y( &ctx, (uint8_t*)yescrypt_client_key, + HMAC_SHA256_Update( &ctx, (uint8_t*)yescrypt_client_key, yescrypt_client_key_len ); else - HMAC_SHA256_Update_Y( &ctx, salt, saltlen ); - HMAC_SHA256_Final_Y(sha256, &ctx); + HMAC_SHA256_Update( &ctx, salt, saltlen ); + HMAC_SHA256_Final(sha256, &ctx); } /* Compute StoredKey */ { diff --git a/algo/yescrypt/yescrypt.c b/algo/yescrypt/yescrypt.c index 770db33..f4adc47 100644 --- a/algo/yescrypt/yescrypt.c +++ b/algo/yescrypt/yescrypt.c @@ -25,7 +25,7 @@ #include "compat.h" #include "yescrypt.h" -#include "sha256_Y.h" +#include "algo/sha/hmac-sha256-hash.h" #include "algo-gate-api.h" #define BYTES2CHARS(bytes) \ @@ -385,35 +385,30 @@ void yescrypthash(void *output, const void *input) int scanhash_yescrypt( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { - uint32_t _ALIGN(64) vhash[8]; - uint32_t _ALIGN(64) endiandata[20]; - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; + uint32_t _ALIGN(64) vhash[8]; + uint32_t _ALIGN(64) endiandata[20]; + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce; + uint32_t n = first_nonce; + int thr_id = mythr->id; // thr_id arg is deprecated - const uint32_t Htarg = ptarget[7]; - const uint32_t first_nonce = pdata[19]; - uint32_t n = first_nonce; - int thr_id = mythr->id; // thr_id arg is deprecated - - for (int k = 0; k < 19; k++) - be32enc(&endiandata[k], pdata[k]); - - do { - be32enc(&endiandata[19], n); - yescrypt_hash((char*) endiandata, (char*) vhash, 80); - if (vhash[7] <= Htarg && fulltest(vhash, ptarget ) - && !opt_benchmark ) - { - pdata[19] = n; - submit_solution( work, vhash, mythr ); - } - n++; - } while (n < max_nonce && !work_restart[thr_id].restart); - - *hashes_done = n - first_nonce + 1; - pdata[19] = n; - - return 0; + for ( int k = 0; k < 19; k++ ) + be32enc( &endiandata[k], pdata[k] ); + endiandata[19] = n; + do { + yescrypt_hash((char*) endiandata, (char*) vhash, 80); + if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark ) + { + be32enc( pdata+19, n ); + submit_solution( work, vhash, mythr ); + } + endiandata[19] = ++n; + } while ( n < last_nonce && !work_restart[thr_id].restart ); + *hashes_done = n - first_nonce; + pdata[19] = n; + return 0; } void yescrypt_gate_base(algo_gate_t *gate ) diff --git a/algo/yespower/crypto/blake2b-yp.c b/algo/yespower/crypto/blake2b-yp.c index f29cc9f..407d2dd 100644 --- a/algo/yespower/crypto/blake2b-yp.c +++ b/algo/yespower/crypto/blake2b-yp.c @@ -30,9 +30,8 @@ #include #include #include - +#include "simd-utils.h" #include -#include #include "blake2b-yp.h" // Cyclic right rotation. @@ -272,7 +271,7 @@ void pbkdf2_blake2b_yp(const uint8_t * passwd, size_t passwdlen, const uint8_t * { hmac_yp_ctx PShctx, hctx; size_t i; - uint8_t ivec[4]; + uint32_t ivec; uint8_t U[32]; uint8_t T[32]; uint64_t j; @@ -286,11 +285,11 @@ void pbkdf2_blake2b_yp(const uint8_t * passwd, size_t passwdlen, const uint8_t * /* Iterate through the blocks. */ for (i = 0; i * 32 < dkLen; i++) { /* Generate INT(i + 1). */ - be32enc(ivec, (uint32_t)(i + 1)); + ivec = bswap_32( i+1 ); /* Compute U_1 = PRF(P, S || INT(i)). */ memcpy(&hctx, &PShctx, sizeof(hmac_yp_ctx)); - hmac_blake2b_yp_update(&hctx, ivec, 4); + hmac_blake2b_yp_update(&hctx, &ivec, 4); hmac_blake2b_yp_final(&hctx, U); /* T_i = U_1 ... */ diff --git a/algo/yespower/insecure_memzero.h b/algo/yespower/insecure_memzero.h deleted file mode 100644 index 5a0ba75..0000000 --- a/algo/yespower/insecure_memzero.h +++ /dev/null @@ -1 +0,0 @@ -#define insecure_memzero(buf, len) /* empty */ diff --git a/algo/yespower/sysendian.h b/algo/yespower/sysendian.h deleted file mode 100644 index 52c1fe7..0000000 --- a/algo/yespower/sysendian.h +++ /dev/null @@ -1,94 +0,0 @@ -/*- - * Copyright 2007-2014 Colin Percival - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _SYSENDIAN_H_ -#define _SYSENDIAN_H_ - -#include - -/* Avoid namespace collisions with BSD . */ -#define be32dec libcperciva_be32dec -#define be32enc libcperciva_be32enc -#define be64enc libcperciva_be64enc -#define le32dec libcperciva_le32dec -#define le32enc libcperciva_le32enc - -static inline uint32_t -be32dec(const void * pp) -{ - const uint8_t * p = (uint8_t const *)pp; - - return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) + - ((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24)); -} - -static inline void -be32enc(void * pp, uint32_t x) -{ - uint8_t * p = (uint8_t *)pp; - - p[3] = x & 0xff; - p[2] = (x >> 8) & 0xff; - p[1] = (x >> 16) & 0xff; - p[0] = (x >> 24) & 0xff; -} - -static inline void -be64enc(void * pp, uint64_t x) -{ - uint8_t * p = (uint8_t *)pp; - - p[7] = x & 0xff; - p[6] = (x >> 8) & 0xff; - p[5] = (x >> 16) & 0xff; - p[4] = (x >> 24) & 0xff; - p[3] = (x >> 32) & 0xff; - p[2] = (x >> 40) & 0xff; - p[1] = (x >> 48) & 0xff; - p[0] = (x >> 56) & 0xff; -} - -static inline uint32_t -le32dec(const void * pp) -{ - const uint8_t * p = (uint8_t const *)pp; - - return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) + - ((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24)); -} - -static inline void -le32enc(void * pp, uint32_t x) -{ - uint8_t * p = (uint8_t *)pp; - - p[0] = x & 0xff; - p[1] = (x >> 8) & 0xff; - p[2] = (x >> 16) & 0xff; - p[3] = (x >> 24) & 0xff; -} - -#endif /* !_SYSENDIAN_H_ */ diff --git a/algo/yespower/utils/insecure_memzero.h b/algo/yespower/utils/insecure_memzero.h deleted file mode 100644 index 5a0ba75..0000000 --- a/algo/yespower/utils/insecure_memzero.h +++ /dev/null @@ -1 +0,0 @@ -#define insecure_memzero(buf, len) /* empty */ diff --git a/algo/yespower/utils/sysendian.h b/algo/yespower/utils/sysendian.h deleted file mode 100644 index 8299e38..0000000 --- a/algo/yespower/utils/sysendian.h +++ /dev/null @@ -1,94 +0,0 @@ -/*- - * Copyright 2007-2014 Colin Percival - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _SYSENDIAN_H_ -#define _SYSENDIAN_H_ - -#include - -/* Avoid namespace collisions with BSD . */ -#define be32dec libcperciva_be32dec -#define be32enc libcperciva_be32enc -#define be64enc libcperciva_be64enc -#define le32dec libcperciva_le32dec -#define le32enc libcperciva_le32enc - -static inline uint32_t -be32dec(const void * pp) -{ - const uint8_t * p = (uint8_t const *)pp; - - return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) + - ((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24)); -} - -static inline void -be32enc(void * pp, uint32_t x) -{ - uint8_t * p = (uint8_t *)pp; - - p[3] = x & 0xff; - p[2] = (x >> 8) & 0xff; - p[1] = (x >> 16) & 0xff; - p[0] = (x >> 24) & 0xff; -} - -static inline void -be64enc(void * pp, uint64_t x) -{ - uint8_t * p = (uint8_t *)pp; - - p[7] = x & 0xff; - p[6] = (x >> 8) & 0xff; - p[5] = (x >> 16) & 0xff; - p[4] = (x >> 24) & 0xff; - p[3] = (x >> 32) & 0xff; - p[2] = (x >> 40) & 0xff; - p[1] = (x >> 48) & 0xff; - p[0] = (x >> 56) & 0xff; -} - -static inline uint32_t -le32dec(const void * pp) -{ - const uint8_t * p = (uint8_t const *)pp; - - return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) + - ((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24)); -} - -static inline void -le32enc(void * pp, uint32_t x) -{ - uint8_t * p = (uint8_t *)pp; - - p[0] = x & 0xff; - p[1] = (x >> 8) & 0xff; - p[2] = (x >> 16) & 0xff; - p[3] = (x >> 24) & 0xff; -} - -#endif /* !_SYSENDIAN_H_ */ diff --git a/algo/yespower/yescrypt-r8g.c b/algo/yespower/yescrypt-r8g.c new file mode 100644 index 0000000..5b9e2be --- /dev/null +++ b/algo/yespower/yescrypt-r8g.c @@ -0,0 +1,80 @@ +/*- + * Copyright 2013-2018 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "cpuminer-config.h" +#include "miner.h" +#include +#include +#include +#include "yescrypt-r8g.h" + +int scanhash_yespower_r8g( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) +{ + uint64_t hash[4] __attribute__((aligned(64))); + uint32_t endiandata[32]; + uint32_t *pdata = work->data; + const uint64_t *ptarget = (const uint64_t*)work->target; + uint32_t n = pdata[19]; + const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce; + const int thr_id = mythr->id; + + yespower_params_t params = + { + .version = YESPOWER_0_5, + .N = 2048, + .r = 8, + .pers = (const uint8_t *)endiandata, + .perslen = work->sapling ? 112 : 80, + }; + + //we need bigendian data... + for ( int i = 0 ; i < 32; i++ ) + be32enc( &endiandata[ i], pdata[ i ]); + endiandata[19] = n; + + do { + yespower_tls( (unsigned char *)endiandata, params.perslen, + ¶ms, (yespower_binary_t*)hash ); + + if unlikely( valid_hash( hash, ptarget ) && !opt_benchmark ) + { + be32enc( pdata+19, n ); + submit_solution( work, hash, mythr ); + } + endiandata[19] = ++n; + } while (n < last_nonce && !work_restart[thr_id].restart); + + *hashes_done = n - first_nonce + 1; + pdata[19] = n; + return 0; +} + +bool register_yescryptr8g_algo( algo_gate_t* gate ) +{ + gate->optimizations = SSE2_OPT | SHA_OPT; + gate->scanhash = (void*)&scanhash_yespower_r8g; + gate->hash = (void*)&yespower_tls; + opt_target_factor = 65536.0; + return true; + }; + + diff --git a/algo/yescrypt/sha256_Y.h b/algo/yespower/yescrypt-r8g.h similarity index 51% rename from algo/yescrypt/sha256_Y.h rename to algo/yespower/yescrypt-r8g.h index 703d059..57f5cec 100644 --- a/algo/yescrypt/sha256_Y.h +++ b/algo/yespower/yescrypt-r8g.h @@ -1,5 +1,6 @@ /*- - * Copyright 2005,2007,2009 Colin Percival + * Copyright 2009 Colin Percival + * Copyright 2013-2018 Alexander Peslyak * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -23,47 +24,26 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/lib/libmd/sha256_Y.h,v 1.2 2006/01/17 15:35:56 phk Exp $ + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. */ +#ifndef _YESPOWERR8G_H_ +#define _YESPOWERR8G_H_ -#ifndef _SHA256_H_ -#define _SHA256_H_ - -#include #include -#include +#include /* for size_t */ +#include "algo-gate-api.h" +#include "algo/yespower/yespower.h" -typedef struct SHA256Context { - uint32_t state[8]; - uint32_t count[2]; - unsigned char buf[64]; -} SHA256_CTX_Y; +#ifdef __cplusplus +extern "C" { +#endif -/* -typedef struct HMAC_SHA256Context { - SHA256_CTX_Y ictx; - SHA256_CTX_Y octx; -} HMAC_SHA256_CTX_Y; -*/ +extern int yespowerr8g_tls(const uint8_t *src, size_t srclen, + const yespower_params_t *params, yespower_binary_t *dst); -typedef struct HMAC_SHA256Context { - SHA256_CTX ictx; - SHA256_CTX octx; -} HMAC_SHA256_CTX_Y; +#ifdef __cplusplus +} +#endif -void SHA256_Init_Y(SHA256_CTX_Y *); -void SHA256_Update_Y(SHA256_CTX_Y *, const void *, size_t); -void SHA256_Final_Y(unsigned char [32], SHA256_CTX_Y *); -void HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y *, const void *, size_t); -void HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y *, const void *, size_t); -void HMAC_SHA256_Final_Y(unsigned char [32], HMAC_SHA256_CTX_Y *); - -/** - * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): - * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and - * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). - */ -void PBKDF2_SHA256_Y(const uint8_t *, size_t, const uint8_t *, size_t, - uint64_t, uint8_t *, size_t); - -#endif /* !_SHA256_H_ */ +#endif /* !_YESPOWERR8G_H_ */ diff --git a/algo/yespower/yespower-blake2b.c b/algo/yespower/yespower-blake2b.c index 8bd70fa..902aac7 100644 --- a/algo/yespower/yespower-blake2b.c +++ b/algo/yespower/yespower-blake2b.c @@ -95,11 +95,7 @@ #include #include #include - -#include "utils/insecure_memzero.h" -#include "utils/sysendian.h" #include "crypto/blake2b-yp.h" - #include "yespower.h" #ifdef __unix__ @@ -952,7 +948,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, salsa20_blk_t *dst = &X[i]; size_t k; for (k = 0; k < 16; k++) - tmp->w[k] = le32dec(&src->w[k]); + tmp->w[k] = src->w[k]; salsa20_simd_shuffle(tmp, dst); } @@ -999,7 +995,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64]; size_t k; for (k = 0; k < 16; k++) - le32enc(&tmp->w[k], src->w[k]); + tmp->w[k] = src->w[k]; salsa20_simd_unshuffle(tmp, dst); } } @@ -1025,7 +1021,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint32_t Nloop, salsa20_blk_t *dst = &X[i]; size_t k; for (k = 0; k < 16; k++) - tmp->w[k] = le32dec(&src->w[k]); + tmp->w[k] = src->w[k]; salsa20_simd_shuffle(tmp, dst); } @@ -1055,7 +1051,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint32_t Nloop, salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64]; size_t k; for (k = 0; k < 16; k++) - le32enc(&tmp->w[k], src->w[k]); + tmp->w[k] = src->w[k]; salsa20_simd_unshuffle(tmp, dst); } } diff --git a/algo/yespower/yespower-gate.c b/algo/yespower/yespower-gate.c index b341df8..5bc9a3b 100644 --- a/algo/yespower/yespower-gate.c +++ b/algo/yespower/yespower-gate.c @@ -32,6 +32,8 @@ static yespower_params_t yespower_params; +// YESPOWER + void yespower_hash( const char *input, char *output, uint32_t len ) { yespower_tls( input, len, &yespower_params, (yespower_binary_t*)output ); @@ -40,36 +42,33 @@ void yespower_hash( const char *input, char *output, uint32_t len ) int scanhash_yespower( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { - uint32_t _ALIGN(64) vhash[8]; - uint32_t _ALIGN(64) endiandata[20]; - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; + uint32_t _ALIGN(64) vhash[8]; + uint32_t _ALIGN(64) endiandata[20]; + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce; + uint32_t n = first_nonce; + const int thr_id = mythr->id; - const uint32_t Htarg = ptarget[7]; - const uint32_t first_nonce = pdata[19]; - uint32_t n = first_nonce; - int thr_id = mythr->id; // thr_id arg is deprecated - - for (int k = 0; k < 19; k++) - be32enc(&endiandata[k], pdata[k]); - do { - be32enc(&endiandata[19], n); - yespower_hash((char*) endiandata, (char*) vhash, 80); - if ( vhash[7] <= Htarg && fulltest( vhash, ptarget ) - && !opt_benchmark ) - { - pdata[19] = n; - submit_solution( work, vhash, mythr ); - } - n++; - } while (n < max_nonce && !work_restart[thr_id].restart); - - *hashes_done = n - first_nonce + 1; - pdata[19] = n; - - return 0; + for ( int k = 0; k < 19; k++ ) + be32enc( &endiandata[k], pdata[k] ); + endiandata[19] = n; + do { + yespower_hash( (char*)endiandata, (char*)vhash, 80 ); + if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark ) + { + be32enc( pdata+19, n ); + submit_solution( work, vhash, mythr ); + } + endiandata[19] = ++n; + } while ( n < last_nonce && !work_restart[thr_id].restart ); + *hashes_done = n - first_nonce; + pdata[19] = n; + return 0; } +// YESPOWER-B2B void yespower_b2b_hash( const char *input, char *output, uint32_t len ) { @@ -79,34 +78,30 @@ void yespower_b2b_hash( const char *input, char *output, uint32_t len ) int scanhash_yespower_b2b( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { - uint32_t _ALIGN(64) vhash[8]; - uint32_t _ALIGN(64) endiandata[20]; - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; + uint32_t _ALIGN(64) vhash[8]; + uint32_t _ALIGN(64) endiandata[20]; + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + const uint32_t first_nonce = pdata[19]; + uint32_t n = first_nonce; + const uint32_t last_nonce = max_nonce; + const int thr_id = mythr->id; // thr_id arg is deprecated - const uint32_t Htarg = ptarget[7]; - const uint32_t first_nonce = pdata[19]; - uint32_t n = first_nonce; - int thr_id = mythr->id; // thr_id arg is deprecated - - for (int k = 0; k < 19; k++) - be32enc(&endiandata[k], pdata[k]); - do { - be32enc(&endiandata[19], n); - yespower_b2b_hash((char*) endiandata, (char*) vhash, 80); - if ( vhash[7] < Htarg && fulltest( vhash, ptarget ) - && !opt_benchmark ) - { - pdata[19] = n; - submit_solution( work, vhash, mythr ); - } - n++; - } while (n < max_nonce && !work_restart[thr_id].restart); - - *hashes_done = n - first_nonce + 1; - pdata[19] = n; - - return 0; + for ( int k = 0; k < 19; k++ ) + be32enc( &endiandata[k], pdata[k] ); + endiandata[19] = n; + do { + yespower_b2b_hash( (char*) endiandata, (char*) vhash, 80 ); + if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark ) + { + be32enc( pdata+19, n ); + submit_solution( work, vhash, mythr ); + } + endiandata[19] = ++n; + } while ( n < last_nonce && !work_restart[thr_id].restart ); + *hashes_done = n - first_nonce; + pdata[19] = n; + return 0; } bool register_yespower_algo( algo_gate_t* gate ) @@ -156,7 +151,7 @@ bool register_yespowerr16_algo( algo_gate_t* gate ) return true; }; - +/* not used bool register_yescrypt_05_algo( algo_gate_t* gate ) { gate->optimizations = SSE2_OPT | SHA_OPT; @@ -208,6 +203,9 @@ bool register_yescryptr32_05_algo( algo_gate_t* gate ) opt_target_factor = 65536.0; return true; } +*/ + +// POWER2B bool register_power2b_algo( algo_gate_t* gate ) { diff --git a/algo/yespower/yespower-opt.c b/algo/yespower/yespower-opt.c index 470a4e5..fbbec6d 100644 --- a/algo/yespower/yespower-opt.c +++ b/algo/yespower/yespower-opt.c @@ -95,13 +95,8 @@ #include #include #include - -#include "insecure_memzero.h" -#include "sha256_p.h" -#include "sysendian.h" - +#include "algo/sha/hmac-sha256-hash.h" #include "yespower.h" - #include "yespower-platform.c" #if __STDC_VERSION__ >= 199901L @@ -861,7 +856,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, salsa20_blk_t *dst = &X[i]; size_t k; for (k = 0; k < 16; k++) - tmp->w[k] = le32dec(&src->w[k]); + tmp->w[k] = src->w[k]; salsa20_simd_shuffle(tmp, dst); } @@ -908,7 +903,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64]; size_t k; for (k = 0; k < 16; k++) - le32enc(&tmp->w[k], src->w[k]); + tmp->w[k] = src->w[k]; salsa20_simd_unshuffle(tmp, dst); } } @@ -934,7 +929,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint32_t Nloop, salsa20_blk_t *dst = &X[i]; size_t k; for (k = 0; k < 16; k++) - tmp->w[k] = le32dec(&src->w[k]); + tmp->w[k] = src->w[k]; salsa20_simd_shuffle(tmp, dst); } @@ -966,7 +961,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint32_t Nloop, salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64]; size_t k; for (k = 0; k < 16; k++) - le32enc(&tmp->w[k], src->w[k]); + tmp->w[k] = src->w[k]; salsa20_simd_unshuffle(tmp, dst); } } diff --git a/algo/yespower/yespower-ref.c b/algo/yespower/yespower-ref.c index 2f50a09..b04cfbb 100644 --- a/algo/yespower/yespower-ref.c +++ b/algo/yespower/yespower-ref.c @@ -51,8 +51,8 @@ #include #include -#include "sha256_p.h" -#include "sysendian.h" +#include "algo/sha/hmac-sha256-hash.h" +//#include "sysendian.h" #include "yespower.h" @@ -346,7 +346,7 @@ static void smix1(uint32_t *B, size_t r, uint32_t N, /* 1: X <-- B */ for (k = 0; k < 2 * r; k++) for (i = 0; i < 16; i++) - X[k * 16 + i] = le32dec(&B[k * 16 + (i * 5 % 16)]); + X[k * 16 + i] = B[k * 16 + (i * 5 % 16)]; if (ctx->version != YESPOWER_0_5) { for (k = 1; k < r; k++) { @@ -378,7 +378,7 @@ static void smix1(uint32_t *B, size_t r, uint32_t N, /* B' <-- X */ for (k = 0; k < 2 * r; k++) for (i = 0; i < 16; i++) - le32enc(&B[k * 16 + (i * 5 % 16)], X[k * 16 + i]); + B[k * 16 + (i * 5 % 16)] = X[k * 16 + i]; } /** @@ -398,7 +398,7 @@ static void smix2(uint32_t *B, size_t r, uint32_t N, uint32_t Nloop, /* X <-- B */ for (k = 0; k < 2 * r; k++) for (i = 0; i < 16; i++) - X[k * 16 + i] = le32dec(&B[k * 16 + (i * 5 % 16)]); + X[k * 16 + i] = B[k * 16 + (i * 5 % 16)]; /* 6: for i = 0 to N - 1 do */ for (i = 0; i < Nloop; i++) { @@ -418,7 +418,7 @@ static void smix2(uint32_t *B, size_t r, uint32_t N, uint32_t Nloop, /* 10: B' <-- X */ for (k = 0; k < 2 * r; k++) for (i = 0; i < 16; i++) - le32enc(&B[k * 16 + (i * 5 % 16)], X[k * 16 + i]); + B[k * 16 + (i * 5 % 16)] = X[k * 16 + i]; } /** diff --git a/algo/yespower/yespower.h b/algo/yespower/yespower.h index 88e2762..e278db4 100644 --- a/algo/yespower/yespower.h +++ b/algo/yespower/yespower.h @@ -71,7 +71,7 @@ typedef struct { */ typedef struct { unsigned char uc[32]; -} yespower_binary_t; +} yespower_binary_t __attribute__ ((aligned (64))); /** * yespower_init_local(local): diff --git a/configure b/configure index b83b81d..3dce50f 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.6. +# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.7. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -577,8 +577,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt' -PACKAGE_VERSION='3.11.6' -PACKAGE_STRING='cpuminer-opt 3.11.6' +PACKAGE_VERSION='3.11.7' +PACKAGE_STRING='cpuminer-opt 3.11.7' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures cpuminer-opt 3.11.6 to adapt to many kinds of systems. +\`configure' configures cpuminer-opt 3.11.7 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1404,7 +1404,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of cpuminer-opt 3.11.6:";; + short | recursive ) echo "Configuration of cpuminer-opt 3.11.7:";; esac cat <<\_ACEOF @@ -1509,7 +1509,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -cpuminer-opt configure 3.11.6 +cpuminer-opt configure 3.11.7 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by cpuminer-opt $as_me 3.11.6, which was +It was created by cpuminer-opt $as_me 3.11.7, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2993,7 +2993,7 @@ fi # Define the identity of the package. PACKAGE='cpuminer-opt' - VERSION='3.11.6' + VERSION='3.11.7' cat >>confdefs.h <<_ACEOF @@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by cpuminer-opt $as_me 3.11.6, which was +This file was extended by cpuminer-opt $as_me 3.11.7, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -6756,7 +6756,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -cpuminer-opt config.status 3.11.6 +cpuminer-opt config.status 3.11.7 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index ebdfdaf..2bf3d8e 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([cpuminer-opt], [3.11.6]) +AC_INIT([cpuminer-opt], [3.11.7]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/cpu-miner.c b/cpu-miner.c index 79e720f..b6edca2 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -506,6 +506,7 @@ static bool gbt_work_decode( const json_t *val, struct work *work ) uint32_t version, curtime, bits; uint32_t prevhash[8]; uint32_t target[8]; + unsigned char final_sapling_hash[32]; int cbtx_size; uchar *cbtx = NULL; int tx_count, tx_size; @@ -529,8 +530,8 @@ static bool gbt_work_decode( const json_t *val, struct work *work ) continue; if ( !strcmp( s, "coinbase/append" ) ) coinbase_append = true; else if ( !strcmp( s, "submit/coinbase" ) ) submit_coinbase = true; - else if ( !strcmp( s, "version/force" ) ) version_force = true; - else if ( !strcmp( s, "version/reduce" ) ) version_reduce = true; + else if ( !strcmp( s, "version/force" ) ) version_force = true; + else if ( !strcmp( s, "version/reduce" ) ) version_reduce = true; } } @@ -550,12 +551,13 @@ static bool gbt_work_decode( const json_t *val, struct work *work ) goto out; } version = (uint32_t) json_integer_value( tmp ); - if ( (version & 0xffU) > BLOCK_VERSION_CURRENT ) + if ( version == 5 ) + work->sapling = true; + else if ( version > 4 ) +// if ( (version & 0xffU) > BLOCK_VERSION_CURRENT ) { if ( version_reduce ) - { version = ( version & ~0xffU ) | BLOCK_VERSION_CURRENT; - } else if ( have_gbt && allow_getwork && !version_force ) { applog( LOG_DEBUG, "Switching to getwork, gbt version %d", version ); @@ -590,6 +592,16 @@ static bool gbt_work_decode( const json_t *val, struct work *work ) goto out; } + if ( work->sapling ) + { + if ( unlikely( !jobj_binary( val, "finalsaplingroothash", + final_sapling_hash, sizeof(final_sapling_hash) ) ) ) + { + applog( LOG_ERR, "JSON invalid finalsaplingroothash" ); + goto out; + } + } + /* find count and size of transactions */ txa = json_object_get(val, "transactions" ); if ( !txa || !json_is_array( txa ) ) @@ -772,7 +784,8 @@ static bool gbt_work_decode( const json_t *val, struct work *work ) /* assemble block header */ algo_gate.build_block_header( work, swab32( version ), (uint32_t*) prevhash, (uint32_t*) merkle_tree, - swab32( curtime ), le32dec( &bits ) ); + swab32( curtime ), le32dec( &bits ), + final_sapling_hash ); if ( unlikely( !jobj_binary(val, "target", target, sizeof(target)) ) ) { @@ -1175,13 +1188,13 @@ static int share_result( int result, struct work *null_work, char str3[65]; // display share hash and target for troubleshooting - diff_to_target( str1, my_stats.share_diff ); + diff_to_target( (uint64_t*)str1, my_stats.share_diff ); for ( int i = 0; i < 8; i++ ) be32enc( str2 + i, str1[7 - i] ); bin2hex( str3, (unsigned char*)str2, 12 ); applog2( LOG_INFO, "Hash: %s...", str3 ); - diff_to_target( str1, my_stats.target_diff ); + diff_to_target( (uint64_t*)str1, my_stats.target_diff ); for ( int i = 0; i < 8; i++ ) be32enc( str2 + i, str1[7 - i] ); bin2hex( str3, (unsigned char*)str2, 12 ); @@ -1364,9 +1377,11 @@ char* std_malloc_txs_request( struct work *work ) char data_str[2 * sizeof(work->data) + 1]; int i; + int datasize = work->sapling ? 112 : 80; + for ( i = 0; i < ARRAY_SIZE(work->data); i++ ) be32enc( work->data + i, work->data[i] ); - bin2hex( data_str, (unsigned char *)work->data, 80 ); + bin2hex( data_str, (unsigned char *)work->data, datasize ); if ( work->workid ) { char *params; @@ -1374,7 +1389,7 @@ char* std_malloc_txs_request( struct work *work ) json_object_set_new( val, "workid", json_string( work->workid ) ); params = json_dumps( val, 0 ); json_decref( val ); - req = (char*) malloc( 128 + 2 * 80 + strlen( work->txs ) + req = (char*) malloc( 128 + 2 * datasize + strlen( work->txs ) + strlen( params ) ); sprintf( req, "{\"method\": \"submitblock\", \"params\": [\"%s%s\", %s], \"id\":4}\r\n", @@ -1383,7 +1398,7 @@ char* std_malloc_txs_request( struct work *work ) } else { - req = (char*) malloc( 128 + 2 * 80 + strlen( work->txs ) ); + req = (char*) malloc( 128 + 2 * datasize + strlen( work->txs ) ); sprintf( req, "{\"method\": \"submitblock\", \"params\": [\"%s%s\"], \"id\":4}\r\n", data_str, work->txs); @@ -1777,7 +1792,7 @@ static bool get_work(struct thr_info *thr, struct work *work) return true; } -static bool submit_work( const struct thr_info *thr, +static bool submit_work( struct thr_info *thr, const struct work *work_in ) { struct workio_cmd *wc; @@ -1843,7 +1858,7 @@ void work_set_target_ratio( struct work* work, const void *hash ) } bool submit_solution( struct work *work, const void *hash, - const struct thr_info *thr ) + struct thr_info *thr ) { if ( likely( submit_work( thr, work ) ) ) { @@ -1861,7 +1876,7 @@ bool submit_solution( struct work *work, const void *hash, } bool submit_lane_solution( struct work *work, const void *hash, - const struct thr_info *thr, const int lane ) + struct thr_info *thr, const int lane ) { if ( likely( submit_work( thr, work ) ) ) { @@ -1992,7 +2007,7 @@ uint32_t *jr2_get_nonceptr( uint32_t *work_data ) } void std_get_new_work( struct work* work, struct work* g_work, int thr_id, - uint32_t *end_nonce_ptr, bool clean_job ) + uint32_t *end_nonce_ptr ) { uint32_t *nonceptr = algo_gate.get_nonceptr( work->data ); @@ -2000,9 +2015,7 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id, strtoul( g_work->job_id, NULL, 16 ) : true; - if ( force_new_work || *nonceptr >= *end_nonce_ptr - || ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size ) - && clean_job ) ) + if ( force_new_work || *nonceptr >= *end_nonce_ptr ) { work_free( work ); work_copy( work, g_work ); @@ -2165,8 +2178,7 @@ static void *miner_thread( void *userdata ) pthread_mutex_lock( &g_work_lock ); if ( *algo_gate.get_nonceptr( work.data ) >= end_nonce ) algo_gate.stratum_gen_work( &stratum, &g_work ); - algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce, - stratum.job.clean ); + algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce ); pthread_mutex_unlock( &g_work_lock ); } else @@ -2186,7 +2198,7 @@ static void *miner_thread( void *userdata ) } g_work_time = time(NULL); } - algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce, true ); + algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce ); pthread_mutex_unlock( &g_work_lock ); } @@ -2579,13 +2591,14 @@ out: // used by stratum and gbt void std_build_block_header( struct work* g_work, uint32_t version, - uint32_t *prevhash, uint32_t *merkle_tree, - uint32_t ntime, uint32_t nbits ) + uint32_t *prevhash, uint32_t *merkle_tree, uint32_t ntime, + uint32_t nbits, unsigned char *final_sapling_hash ) { int i; memset( g_work->data, 0, sizeof(g_work->data) ); g_work->data[0] = version; + g_work->sapling = be32dec( &version ) == 5 ? true : false; if ( have_stratum ) for ( i = 0; i < 8; i++ ) @@ -2599,8 +2612,27 @@ void std_build_block_header( struct work* g_work, uint32_t version, g_work->data[ algo_gate.ntime_index ] = ntime; g_work->data[ algo_gate.nbits_index ] = nbits; - g_work->data[20] = 0x80000000; - g_work->data[31] = 0x00000280; + if ( g_work->sapling ) + { + if ( have_stratum ) + for ( i = 0; i < 8; i++ ) + g_work->data[20 + i] = le32dec( (uint32_t*)final_sapling_hash + i ); + else + { + for ( i = 0; i < 8; i++ ) + g_work->data[27 - i] = le32dec( (uint32_t*)final_sapling_hash + i ); + g_work->data[19] = 0; + } + g_work->data[28] = 0x80000000; + g_work->data[29] = 0x00000000; + g_work->data[30] = 0x00000000; + g_work->data[31] = 0x00000380; + } + else + { + g_work->data[20] = 0x80000000; + g_work->data[31] = 0x00000280; + } } void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) @@ -2614,7 +2646,8 @@ void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) // Assemble block header algo_gate.build_block_header( g_work, le32dec( sctx->job.version ), (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree, - le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits) ); + le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits), + sctx->job.final_sapling_hash ); } void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work ) @@ -3766,6 +3799,9 @@ int main(int argc, char *argv[]) */ } + applog( LOG_INFO, "Extranonce subscribe: %s", + opt_extranonce ? "YES" : "NO" ); + #ifdef HAVE_SYSLOG_H if (use_syslog) openlog("cpuminer", LOG_PID, LOG_USER); diff --git a/miner.h b/miner.h index f16c2ae..dbb3006 100644 --- a/miner.h +++ b/miner.h @@ -317,7 +317,7 @@ bool valid_hash( const void*, const void* ); void work_set_target( struct work* work, double diff ); double target_to_diff( uint32_t* target ); -extern void diff_to_target(uint32_t *target, double diff); +extern void diff_to_target( uint64_t *target, double diff ); double hash_target_ratio( uint32_t* hash, uint32_t* target ); void work_set_target_ratio( struct work* work, const void *hash ); @@ -333,9 +333,9 @@ struct thr_info { //struct thr_info *thr_info; bool submit_solution( struct work *work, const void *hash, - const struct thr_info *thr ); + struct thr_info *thr ); bool submit_lane_solution( struct work *work, const void *hash, - const struct thr_info *thr, const int lane ); + struct thr_info *thr, const int lane ); //bool submit_work( struct thr_info *thr, const struct work *work_in ); @@ -363,7 +363,7 @@ float cpu_temp( int core ); struct work { uint32_t data[48] __attribute__ ((aligned (64))); - uint32_t target[8]; + uint32_t target[8] __attribute__ ((aligned (64))); double targetdiff; // double shareratio; @@ -376,6 +376,8 @@ struct work { char *job_id; size_t xnonce2_len; unsigned char *xnonce2; + bool sapling; + // x16rt uint32_t merkleroothash[8]; uint32_t witmerkleroothash[8]; @@ -387,8 +389,9 @@ struct work { } __attribute__ ((aligned (64))); struct stratum_job { - char *job_id; unsigned char prevhash[32]; + unsigned char final_sapling_hash[32]; + char *job_id; size_t coinbase_size; unsigned char *coinbase; unsigned char *xnonce2; @@ -571,6 +574,7 @@ enum algos { ALGO_SHA256D, ALGO_SHA256Q, ALGO_SHA256T, + ALGO_SHA3D, ALGO_SHAVITE3, ALGO_SKEIN, ALGO_SKEIN2, @@ -604,6 +608,7 @@ enum algos { ALGO_XEVAN, ALGO_YESCRYPT, ALGO_YESCRYPTR8, + ALGO_YESCRYPTR8G, ALGO_YESCRYPTR16, ALGO_YESCRYPTR32, ALGO_YESPOWER, @@ -669,6 +674,7 @@ static const char* const algo_names[] = { "sha256d", "sha256q", "sha256t", + "sha3d", "shavite3", "skein", "skein2", @@ -702,6 +708,7 @@ static const char* const algo_names[] = { "xevan", "yescrypt", "yescryptr8", + "yescryptr8g", "yescryptr16", "yescryptr32", "yespower", @@ -834,7 +841,8 @@ Options:\n\ sha256d Double SHA-256\n\ sha256q Quad SHA-256, Pyrite (PYE)\n\ sha256t Triple SHA-256, Onecoin (OC)\n\ - shavite3 Shavite3\n\ + sha3d Double Keccak256 (BSHA3)\n\ + shavite3 Shavite3\n\ skein Skein+Sha (Skeincoin)\n\ skein2 Double Skein (Woodcoin)\n\ skunk Signatum (SIGT)\n\ @@ -867,6 +875,7 @@ Options:\n\ xevan Bitsend (BSD)\n\ yescrypt Globalboost-Y (BSTY)\n\ yescryptr8 BitZeny (ZNY)\n\ + yescryptr8g Koto (KOTO)\n\ yescryptr16 Eli\n\ yescryptr32 WAVI\n\ yespower Cryply\n\ diff --git a/util.c b/util.c index 629b17b..d701389 100644 --- a/util.c +++ b/util.c @@ -923,7 +923,7 @@ bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t buflen) size_t address_to_script(unsigned char *out, size_t outsz, const char *addr) { - unsigned char addrbin[25]; + unsigned char addrbin[26]; int addrver; size_t rv; @@ -1038,27 +1038,33 @@ bool fulltest( const uint32_t *hash, const uint32_t *target ) return rc; } -void diff_to_target(uint32_t *target, double diff) +void diff_to_target(uint64_t *target, double diff) { uint64_t m; int k; - for (k = 6; k > 0 && diff > 1.0; k--) - diff /= 4294967296.0; - m = (uint64_t)(4294901760.0 / diff); - if (m == 0 && k == 6) - memset(target, 0xff, 32); - else { - memset(target, 0, 32); - target[k] = (uint32_t)m; - target[k + 1] = (uint32_t)(m >> 32); + const double exp64 = (double)0xffffffffffffffff + 1.; + for ( k = 3; k > 0 && diff > 1.0; k-- ) + diff /= exp64; + +// for (k = 6; k > 0 && diff > 1.0; k--) +// diff /= 4294967296.0; + m = (uint64_t)( 0xffff0000 / diff ); + if unlikely( m == 0 && k == 3 ) + memset( target, 0xff, 32 ); + else + { + memset( target, 0, 32 ); + target[k] = m; +// target[k] = (uint32_t)m; +// target[k + 1] = (uint32_t)(m >> 32); } } // Only used by stratum pools void work_set_target(struct work* work, double diff) { - diff_to_target(work->target, diff); + diff_to_target( (uint64_t*)work->target, diff ); work->targetdiff = diff; } @@ -1830,6 +1836,7 @@ static uint32_t getblocheight(struct stratum_ctx *sctx) static bool stratum_notify(struct stratum_ctx *sctx, json_t *params) { const char *job_id, *prevhash, *coinb1, *coinb2, *version, *nbits, *stime; + const char *finalsaplinghash = NULL; const char *denom10 = NULL, *denom100 = NULL, *denom1000 = NULL, *denom10000 = NULL, *prooffullnode = NULL; const char *extradata = NULL; @@ -1890,6 +1897,18 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params) goto out; } + hex2bin( sctx->job.version, version, 4 ); + int ver = be32dec( sctx->job.version ); + if ( ver == 5 ) + { + finalsaplinghash = json_string_value( json_array_get( params, 9 ) ); + if ( !finalsaplinghash || strlen(finalsaplinghash) != 64 ) + { + applog( LOG_ERR, "Stratum notify: invalid version 5 parameters" ); + goto out; + } + } + if ( is_veil ) { if ( !denom10 || !denom100 || !denom1000 || !denom10000 @@ -1903,66 +1922,69 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params) } if ( merkle_count ) - merkle = (uchar**) malloc(merkle_count * sizeof(char *)); + merkle = (uchar**) malloc( merkle_count * sizeof(char *) ); for ( i = 0; i < merkle_count; i++ ) { - const char *s = json_string_value(json_array_get(merkle_arr, i)); - if (!s || strlen(s) != 64) { - while (i--) - free(merkle[i]); - free(merkle); - applog(LOG_ERR, "Stratum notify: invalid Merkle branch"); + const char *s = json_string_value( json_array_get( merkle_arr, i ) ); + if ( !s || strlen(s) != 64 ) + { + while ( i-- ) free( merkle[i] ); + free( merkle ); + applog( LOG_ERR, "Stratum notify: invalid Merkle branch" ); goto out; } - merkle[i] = (uchar*) malloc(32); - hex2bin(merkle[i], s, 32); + merkle[i] = (uchar*) malloc( 32 ); + hex2bin( merkle[i], s, 32 ); } - pthread_mutex_lock(&sctx->work_lock); + pthread_mutex_lock( &sctx->work_lock ); - coinb1_size = strlen(coinb1) / 2; - coinb2_size = strlen(coinb2) / 2; + coinb1_size = strlen( coinb1 ) / 2; + coinb2_size = strlen( coinb2 ) / 2; sctx->job.coinbase_size = coinb1_size + sctx->xnonce1_size + sctx->xnonce2_size + coinb2_size; - sctx->job.coinbase = (uchar*) realloc(sctx->job.coinbase, sctx->job.coinbase_size); + sctx->job.coinbase = (uchar*) realloc( sctx->job.coinbase, + sctx->job.coinbase_size ); sctx->job.xnonce2 = sctx->job.coinbase + coinb1_size + sctx->xnonce1_size; - hex2bin(sctx->job.coinbase, coinb1, coinb1_size); - memcpy(sctx->job.coinbase + coinb1_size, sctx->xnonce1, sctx->xnonce1_size); - if (!sctx->job.job_id || strcmp(sctx->job.job_id, job_id)) + hex2bin( sctx->job.coinbase, coinb1, coinb1_size ); + memcpy( sctx->job.coinbase + coinb1_size, + sctx->xnonce1, sctx->xnonce1_size ); + if ( !sctx->job.job_id || strcmp( sctx->job.job_id, job_id ) ) memset(sctx->job.xnonce2, 0, sctx->xnonce2_size); - hex2bin(sctx->job.xnonce2 + sctx->xnonce2_size, coinb2, coinb2_size); - free(sctx->job.job_id); - sctx->job.job_id = strdup(job_id); - hex2bin(sctx->job.prevhash, prevhash, 32); - if (has_claim) hex2bin(sctx->job.extra, extradata, 32); - if (has_roots) hex2bin(sctx->job.extra, extradata, 64); + hex2bin( sctx->job.xnonce2 + sctx->xnonce2_size, coinb2, coinb2_size ); + free( sctx->job.job_id ); + sctx->job.job_id = strdup( job_id ); + hex2bin( sctx->job.prevhash, prevhash, 32 ); + if ( has_claim ) hex2bin( sctx->job.extra, extradata, 32 ); + if ( has_roots ) hex2bin( sctx->job.extra, extradata, 64 ); + if ( ver == 5 ) + hex2bin( sctx->job.final_sapling_hash, finalsaplinghash, 32 ); if ( is_veil ) { - hex2bin(sctx->job.denom10, denom10, 32); - hex2bin(sctx->job.denom100, denom100, 32); - hex2bin(sctx->job.denom1000, denom1000, 32); - hex2bin(sctx->job.denom10000, denom10000, 32); - hex2bin(sctx->job.proofoffullnode, prooffullnode, 32); + hex2bin( sctx->job.denom10, denom10, 32 ); + hex2bin( sctx->job.denom100, denom100, 32 ); + hex2bin( sctx->job.denom1000, denom1000, 32 ); + hex2bin( sctx->job.denom10000, denom10000, 32 ); + hex2bin( sctx->job.proofoffullnode, prooffullnode, 32 ); } - sctx->block_height = getblocheight(sctx); + sctx->block_height = getblocheight( sctx ); - for (i = 0; i < sctx->job.merkle_count; i++) - free(sctx->job.merkle[i]); + for ( i = 0; i < sctx->job.merkle_count; i++ ) + free( sctx->job.merkle[i] ); - free(sctx->job.merkle); + free( sctx->job.merkle ); sctx->job.merkle = merkle; sctx->job.merkle_count = merkle_count; - hex2bin(sctx->job.version, version, 4); - hex2bin(sctx->job.nbits, nbits, 4); - hex2bin(sctx->job.ntime, stime, 4); + hex2bin( sctx->job.nbits, nbits, 4 ); + hex2bin( sctx->job.ntime, stime, 4 ); sctx->job.clean = clean; sctx->job.diff = sctx->next_diff; - pthread_mutex_unlock(&sctx->work_lock); + pthread_mutex_unlock( &sctx->work_lock ); ret = true;