diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 8bb7939..1fd50fa 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -65,6 +65,13 @@ If not what makes it happen or not happen? Change Log ---------- +v3.14.3 + +#265: more mutex changes to reduce blocking with high thread count. + +#267: fixed hodl algo potential memory alignment issue, + add warning when thread count is not valid for mining hodl algo. + v3.14.2 The second line of the Share Accepted log is no longer displayed, diff --git a/algo-gate-api.c b/algo-gate-api.c index 410b664..38da868 100644 --- a/algo-gate-api.c +++ b/algo-gate-api.c @@ -128,6 +128,119 @@ int scanhash_generic( struct work *work, uint32_t max_nonce, return 0; } +#if defined(__AVX2__) + +//int scanhash_4way_64_64( struct work *work, uint32_t max_nonce, +// uint64_t *hashes_done, struct thr_info *mythr ) + +//int scanhash_4way_64_640( struct work *work, uint32_t max_nonce, +// uint64_t *hashes_done, struct thr_info *mythr ) + +int scanhash_4way_64in_32out( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) +{ + uint32_t hash32[8*4] __attribute__ ((aligned (64))); + uint32_t vdata[20*4] __attribute__ ((aligned (64))); + uint32_t lane_hash[8] __attribute__ ((aligned (64))); + uint32_t *hash32_d7 = &(hash32[ 7*4 ]); + uint32_t *pdata = work->data; + const uint32_t *ptarget = work->target; + const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce - 4; + __m256i *noncev = (__m256i*)vdata + 9; + uint32_t n = first_nonce; + const int thr_id = mythr->id; + const uint32_t targ32_d7 = ptarget[7]; + const bool bench = opt_benchmark; + + mm256_bswap32_intrlv80_4x64( vdata, pdata ); + *noncev = mm256_intrlv_blend_32( + _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); + do + { + if ( likely( algo_gate.hash( hash32, vdata, thr_id ) ) ) + for ( int lane = 0; lane < 4; lane++ ) + if ( unlikely( hash32_d7[ lane ] <= targ32_d7 && !bench ) ) + { + extr_lane_4x32( lane_hash, hash32, lane, 256 ); + if ( valid_hash( lane_hash, ptarget ) ) + { + pdata[19] = bswap_32( n + lane ); + submit_solution( work, lane_hash, mythr ); + } + } + *noncev = _mm256_add_epi32( *noncev, + m256_const1_64( 0x0000000400000000 ) ); + n += 4; + } while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) ); + pdata[19] = n; + *hashes_done = n - first_nonce; + return 0; +} + +//int scanhash_8way_32_32( struct work *work, uint32_t max_nonce, +// uint64_t *hashes_done, struct thr_info *mythr ) + +#endif + +#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) + +//int scanhash_8way_64_64( struct work *work, uint32_t max_nonce, +// uint64_t *hashes_done, struct thr_info *mythr ) + +//int scanhash_8way_64_640( struct work *work, uint32_t max_nonce, +// uint64_t *hashes_done, struct thr_info *mythr ) + +int scanhash_8way_64in_32out( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) +{ + uint32_t hash32[8*8] __attribute__ ((aligned (128))); + uint32_t vdata[20*8] __attribute__ ((aligned (64))); + uint32_t lane_hash[8] __attribute__ ((aligned (64))); + uint32_t *hash32_d7 = &(hash32[7*8]); + uint32_t *pdata = work->data; + const uint32_t *ptarget = work->target; + const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce - 8; + __m512i *noncev = (__m512i*)vdata + 9; + uint32_t n = first_nonce; + const int thr_id = mythr->id; + const uint32_t targ32_d7 = ptarget[7]; + const bool bench = opt_benchmark; + + mm512_bswap32_intrlv80_8x64( vdata, pdata ); + *noncev = mm512_intrlv_blend_32( + _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0, + n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); + do + { + if ( likely( algo_gate.hash( hash32, vdata, thr_id ) ) ) + for ( int lane = 0; lane < 8; lane++ ) + if ( unlikely( ( hash32_d7[ lane ] <= targ32_d7 ) && !bench ) ) + { + extr_lane_8x32( lane_hash, hash32, lane, 256 ); + if ( likely( valid_hash( lane_hash, ptarget ) ) ) + { + pdata[19] = bswap_32( n + lane ); + submit_solution( work, lane_hash, mythr ); + } + } + *noncev = _mm512_add_epi32( *noncev, + m512_const1_64( 0x0000000800000000 ) ); + n += 8; + } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); + pdata[19] = n; + *hashes_done = n - first_nonce; + return 0; +} + +//int scanhash_16way_32_32( struct work *work, uint32_t max_nonce, +// uint64_t *hashes_done, struct thr_info *mythr ) + +#endif + + + int null_hash() { applog(LOG_WARNING,"SWERR: null_hash unsafe null function"); diff --git a/algo-gate-api.h b/algo-gate-api.h index 0110ace..80aa3b4 100644 --- a/algo-gate-api.h +++ b/algo-gate-api.h @@ -110,12 +110,12 @@ inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; } typedef struct { -// Mandatory functions, one of these is mandatory. If the default scanhash +// Mandatory functions, one of these is mandatory. If a generic scanhash // is used a custom hash function must be registered, with a custom scanhash -// the hash function is not necessary. +// the custom hash function can be called directly and doesn't need to be +// registered in the gate. int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* ); -//int ( *hash ) ( void*, const void*, uint32_t ) ; int ( *hash ) ( void*, const void*, int ); //optional, safe to use default in most cases @@ -203,19 +203,61 @@ void four_way_not_tested(); #define STD_WORK_DATA_SIZE 128 #define STD_WORK_CMP_SIZE 76 -#define JR2_NONCE_INDEX 39 // 8 bit offset +//#define JR2_NONCE_INDEX 39 // 8 bit offset // These indexes are only used with JSON RPC2 and are not gated. -#define JR2_WORK_CMP_INDEX_2 43 -#define JR2_WORK_CMP_SIZE_2 33 +//#define JR2_WORK_CMP_INDEX_2 43 +//#define JR2_WORK_CMP_SIZE_2 33 // deprecated, use generic instead int null_scanhash(); // Default generic, may be used in many cases. +// N-way is more complicated, requires many different implementations +// depending on architecture, input format, and output format. +// Naming convention is scanhash_[N]way_[input format]in_[output format]out +// N = number of lanes +// input/output format: +// 32: 32 bit interleaved parallel lanes +// 64: 64 bit interleaved parallel lanes +// 640: input only, not interleaved, contiguous serial 640 bit lanes. +// 256: output only, not interleaved, contiguous serial 256 bit lanes. + int scanhash_generic( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); +#if defined(__AVX2__) + +//int scanhash_4way_64in_64out( struct work *work, uint32_t max_nonce, +// uint64_t *hashes_done, struct thr_info *mythr ); + +//int scanhash_4way_64in_256out( struct work *work, uint32_t max_nonce, +// uint64_t *hashes_done, struct thr_info *mythr ); + +int scanhash_4way_64in_32out( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); + +//int scanhash_8way_32in_32out( struct work *work, uint32_t max_nonce, +// uint64_t *hashes_done, struct thr_info *mythr ); + +#endif + +#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) + +//int scanhash_8way_64in_64out( struct work *work, uint32_t max_nonce, +// uint64_t *hashes_done, struct thr_info *mythr ); + +//int scanhash_8way_64in_256out( struct work *work, uint32_t max_nonce, +// uint64_t *hashes_done, struct thr_info *mythr ); + +int scanhash_8way_64in_32out( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ); + +//int scanhash_16way_32in_32out( struct work *work, uint32_t max_nonce, +// uint64_t *hashes_done, struct thr_info *mythr ); + +#endif + // displays warning int null_hash (); @@ -263,7 +305,7 @@ int std_get_work_data_size(); // by calling the algo's register function. bool register_algo_gate( int algo, algo_gate_t *gate ); -// Called by algos toverride any default gate functions that are applicable +// Called by algos to verride any default gate functions that are applicable // and do any other algo-specific initialization. // The register functions for all the algos can be declared here to reduce // compiler warnings but that's just more work for devs adding new algos. diff --git a/algo/blake/decred-gate.c b/algo/blake/decred-gate.c index 80b2900..0a90de7 100644 --- a/algo/blake/decred-gate.c +++ b/algo/blake/decred-gate.c @@ -78,7 +78,6 @@ void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) uint32_t extraheader[32] = { 0 }; int headersize = 0; uint32_t* extradata = (uint32_t*) sctx->xnonce1; - size_t t; int i; // getwork over stratum, getwork merkle + header passed in coinb1 @@ -87,9 +86,6 @@ void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) sizeof(extraheader) ); memcpy( extraheader, &sctx->job.coinbase[32], headersize ); - // Increment extranonce2 - for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ ); - // Assemble block header memset( g_work->data, 0, sizeof(g_work->data) ); g_work->data[0] = le32dec( sctx->job.version ); diff --git a/algo/hodl/hodl-gate.c b/algo/hodl/hodl-gate.c index 930b026..2618309 100644 --- a/algo/hodl/hodl-gate.c +++ b/algo/hodl/hodl-gate.c @@ -99,13 +99,13 @@ void hodl_build_block_header( struct work* g_work, uint32_t version, // called only by thread 0, saves a backup of g_work void hodl_get_new_work( struct work* work, struct work* g_work) { - pthread_mutex_lock( &g_work_lock ); + pthread_rwlock_rdlock( &g_work_lock ); work_free( &hodl_work ); work_copy( &hodl_work, g_work ); hodl_work.data[ algo_gate.nonce_index ] = ( clock() + rand() ) % 9999; - pthread_mutex_unlock( &g_work_lock ); + pthread_rwlock_unlock( &g_work_lock ); } json_t *hodl_longpoll_rpc_call( CURL *curl, int *err, char* lp_url ) @@ -159,11 +159,10 @@ bool register_hodl_algo( algo_gate_t* gate ) applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version."); return false; #endif -// if ( TOTAL_CHUNKS % opt_n_threads ) -// { -// applog(LOG_ERR,"Thread count must be power of 2."); -// return false; -// } + + if ( GARBAGE_SIZE % opt_n_threads ) + applog( LOG_WARNING,"WARNING: Thread count must be power of 2. Miner may crash or produce invalid hash!" ); + pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads ); gate->optimizations = SSE42_OPT | AES_OPT | AVX2_OPT; gate->scanhash = (void*)&hodl_scanhash; @@ -175,7 +174,7 @@ bool register_hodl_algo( algo_gate_t* gate ) gate->resync_threads = (void*)&hodl_resync_threads; gate->do_this_thread = (void*)&hodl_do_this_thread; gate->work_cmp_size = 76; - hodl_scratchbuf = (unsigned char*)malloc( 1 << 30 ); + hodl_scratchbuf = (unsigned char*)_mm_malloc( 1 << 30, 64 ); allow_getwork = false; opt_target_factor = 8388608.0; return ( hodl_scratchbuf != NULL ); diff --git a/algo/hodl/hodl-wolf.c b/algo/hodl/hodl-wolf.c index ef580b2..6ff6175 100644 --- a/algo/hodl/hodl-wolf.c +++ b/algo/hodl/hodl-wolf.c @@ -70,7 +70,7 @@ int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce, uint32_t *ptarget = work->target; int threadNumber = mythr->id; CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf; - CacheEntry Cache[AES_PARALLEL_N]; + CacheEntry Cache[AES_PARALLEL_N] __attribute__ ((aligned (64))); __m128i* data[AES_PARALLEL_N]; const __m128i* next[AES_PARALLEL_N]; uint32_t CollisionCount = 0; diff --git a/algo/lyra2/lyra2-gate.c b/algo/lyra2/lyra2-gate.c index 6e42778..ad62d05 100644 --- a/algo/lyra2/lyra2-gate.c +++ b/algo/lyra2/lyra2-gate.c @@ -215,9 +215,6 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) size_t t; algo_gate.gen_merkle_root( merkle_tree, sctx ); - // Increment extranonce2 - for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ ); - // Assemble block header algo_gate.build_block_header( g_work, le32dec( sctx->job.version ), (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree, le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits), NULL ); @@ -225,7 +222,6 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) g_work->data[ 20+t ] = ((uint32_t*)sctx->job.extra)[t]; } - bool register_phi2_algo( algo_gate_t* gate ) { gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; diff --git a/algo/nist5/zr5.c b/algo/nist5/zr5.c index fa731bb..6152091 100644 --- a/algo/nist5/zr5.c +++ b/algo/nist5/zr5.c @@ -156,7 +156,7 @@ int scanhash_zr5( struct work *work, uint32_t max_nonce, void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id, uint32_t* end_nonce_ptr ) { - pthread_mutex_lock( &g_work_lock ); + pthread_rwlock_rdlock( &g_work_lock ); // ignore POK in first word const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t) @@ -174,7 +174,7 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id, else ++(*nonceptr); - pthread_mutex_unlock( &g_work_lock ); + pthread_rwlock_unlock( &g_work_lock ); } void zr5_display_pok( struct work* work ) diff --git a/algo/ripemd/lbry-gate.c b/algo/ripemd/lbry-gate.c index f4080a8..ba38c65 100644 --- a/algo/ripemd/lbry-gate.c +++ b/algo/ripemd/lbry-gate.c @@ -69,13 +69,9 @@ void lbry_build_block_header( struct work* g_work, uint32_t version, void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) { unsigned char merkle_root[64] = { 0 }; - size_t t; int i; algo_gate.gen_merkle_root( merkle_root, sctx ); - // Increment extranonce2 - for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ ); - // Assemble block header memset( g_work->data, 0, sizeof(g_work->data) ); g_work->data[0] = le32dec( sctx->job.version ); diff --git a/algo/x16/minotaur.c b/algo/x16/minotaur.c index fda02ae..069bf97 100644 --- a/algo/x16/minotaur.c +++ b/algo/x16/minotaur.c @@ -227,7 +227,7 @@ bool initialize_torture_garden() } // Produce a 32-byte hash from 80-byte input data -int minotaur_hash( void *output, const void *input ) +int minotaur_hash( void *output, const void *input, int thr_id ) { unsigned char hash[64] __attribute__ ((aligned (64))); diff --git a/algo/x16/x16r-gate.c b/algo/x16/x16r-gate.c index 28ce579..462e264 100644 --- a/algo/x16/x16r-gate.c +++ b/algo/x16/x16r-gate.c @@ -135,18 +135,16 @@ void x16rt_getAlgoString( const uint32_t *timeHash, char *output) void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) { + uint32_t merkleroothash[8]; + uint32_t witmerkleroothash[8]; + uint32_t denom10[8]; + uint32_t denom100[8]; + uint32_t denom1000[8]; + uint32_t denom10000[8]; + int i; uchar merkle_tree[64] = { 0 }; - size_t t; algo_gate.gen_merkle_root( merkle_tree, sctx ); - // Increment extranonce2 - for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ ); - - // Assemble block header -// algo_gate.build_block_header( g_work, le32dec( sctx->job.version ), -// (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree, -// le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits) ); - int i; memset( g_work->data, 0, sizeof(g_work->data) ); g_work->data[0] = le32dec( sctx->job.version ); @@ -164,35 +162,35 @@ void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) g_work->data[31] = 0x00000280; for ( i = 0; i < 8; i++ ) - g_work->merkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i); + merkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i); for ( i = 0; i < 8; i++ ) - g_work->witmerkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i); + witmerkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i); for ( i = 0; i < 8; i++ ) - g_work->denom10[i] = le32dec((uint32_t *)sctx->job.denom10 + i); + denom10[i] = le32dec((uint32_t *)sctx->job.denom10 + i); for ( i = 0; i < 8; i++ ) - g_work->denom100[i] = le32dec((uint32_t *)sctx->job.denom100 + i); + denom100[i] = le32dec((uint32_t *)sctx->job.denom100 + i); for ( i = 0; i < 8; i++ ) - g_work->denom1000[i] = le32dec((uint32_t *)sctx->job.denom1000 + i); + denom1000[i] = le32dec((uint32_t *)sctx->job.denom1000 + i); for ( i = 0; i < 8; i++ ) - g_work->denom10000[i] = le32dec((uint32_t *)sctx->job.denom10000 + i); + denom10000[i] = le32dec((uint32_t *)sctx->job.denom10000 + i); uint32_t pofnhash[8]; memset(pofnhash, 0x00, 32); - char denom10_str [ 2 * sizeof( g_work->denom10 ) + 1 ]; - char denom100_str [ 2 * sizeof( g_work->denom100 ) + 1 ]; - char denom1000_str [ 2 * sizeof( g_work->denom1000 ) + 1 ]; - char denom10000_str [ 2 * sizeof( g_work->denom10000 ) + 1 ]; - char merkleroot_str [ 2 * sizeof( g_work->merkleroothash ) + 1 ]; - char witmerkleroot_str[ 2 * sizeof( g_work->witmerkleroothash ) + 1 ]; + char denom10_str [ 2 * sizeof( denom10 ) + 1 ]; + char denom100_str [ 2 * sizeof( denom100 ) + 1 ]; + char denom1000_str [ 2 * sizeof( denom1000 ) + 1 ]; + char denom10000_str [ 2 * sizeof( denom10000 ) + 1 ]; + char merkleroot_str [ 2 * sizeof( merkleroothash ) + 1 ]; + char witmerkleroot_str[ 2 * sizeof( witmerkleroothash ) + 1 ]; char pofn_str [ 2 * sizeof( pofnhash ) + 1 ]; - cbin2hex( denom10_str, (char*) g_work->denom10, 32 ); - cbin2hex( denom100_str, (char*) g_work->denom100, 32 ); - cbin2hex( denom1000_str, (char*) g_work->denom1000, 32 ); - cbin2hex( denom10000_str, (char*) g_work->denom10000, 32 ); - cbin2hex( merkleroot_str, (char*) g_work->merkleroothash, 32 ); - cbin2hex( witmerkleroot_str, (char*) g_work->witmerkleroothash, 32 ); + cbin2hex( denom10_str, (char*) denom10, 32 ); + cbin2hex( denom100_str, (char*) denom100, 32 ); + cbin2hex( denom1000_str, (char*) denom1000, 32 ); + cbin2hex( denom10000_str, (char*) denom10000, 32 ); + cbin2hex( merkleroot_str, (char*) merkleroothash, 32 ); + cbin2hex( witmerkleroot_str, (char*) witmerkleroothash, 32 ); cbin2hex( pofn_str, (char*) pofnhash, 32 ); if ( true ) diff --git a/algo/x17/sonoa-4way.c b/algo/x17/sonoa-4way.c index 28ddd7e..a9e6b63 100644 --- a/algo/x17/sonoa-4way.c +++ b/algo/x17/sonoa-4way.c @@ -58,7 +58,7 @@ union _sonoa_8way_context_overlay typedef union _sonoa_8way_context_overlay sonoa_8way_context_overlay; -int sonoa_8way_hash( void *state, const void *input, int thrid ) +int sonoa_8way_hash( void *state, const void *input, int thr_id ) { uint64_t vhash[8*8] __attribute__ ((aligned (128))); uint64_t vhashA[8*8] __attribute__ ((aligned (64))); @@ -186,7 +186,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid ) #endif - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // 2 bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 ); @@ -302,7 +302,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid ) hamsi512_8way_update( &ctx.hamsi, vhash, 64 ); hamsi512_8way_close( &ctx.hamsi, vhash ); - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // 3 bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 ); @@ -432,7 +432,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid ) sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 ); sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 ); - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // 4 intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, @@ -630,7 +630,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid ) #endif - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // 5 bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 ); @@ -783,7 +783,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid ) sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 ); - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // 6 intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, @@ -952,7 +952,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid ) sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 ); - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // 7 intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, @@ -1117,49 +1117,6 @@ int sonoa_8way_hash( void *state, const void *input, int thrid ) return 1; } - -int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ - uint32_t hash[8*16] __attribute__ ((aligned (128))); - uint32_t vdata[20*8] __attribute__ ((aligned (64))); - uint32_t lane_hash[8] __attribute__ ((aligned (64))); - uint32_t *hashd7 = &(hash[7<<3]); - uint32_t *pdata = work->data; - const uint32_t *ptarget = work->target; - const uint32_t first_nonce = pdata[19]; - const uint32_t last_nonce = max_nonce - 8; - __m512i *noncev = (__m512i*)vdata + 9; // aligned - uint32_t n = first_nonce; - const int thr_id = mythr->id; - const uint32_t targ32 = ptarget[7]; - - mm512_bswap32_intrlv80_8x64( vdata, pdata ); - *noncev = mm512_intrlv_blend_32( - _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0, - n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); - - do - { - if ( sonoa_8way_hash( hash, vdata, thr_id ) ) - for ( int lane = 0; lane < 8; lane++ ) - if unlikely( ( hashd7[ lane ] <= targ32 ) ) - { - extr_lane_8x32( lane_hash, hash, lane, 256 ); - if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) ) - { - pdata[19] = bswap_32( n + lane ); - submit_solution( work, lane_hash, mythr ); - } - } - *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); - n += 8; - } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); - pdata[19] = n; - *hashes_done = n - first_nonce; - return 0; -} #elif defined(SONOA_4WAY) @@ -1186,7 +1143,7 @@ union _sonoa_4way_context_overlay typedef union _sonoa_4way_context_overlay sonoa_4way_context_overlay; -int sonoa_4way_hash( void *state, const void *input, int thrid ) +int sonoa_4way_hash( void *state, const void *input, int thr_id ) { uint64_t hash0[8] __attribute__ ((aligned (64))); uint64_t hash1[8] __attribute__ ((aligned (64))); @@ -1250,7 +1207,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid ) echo_full( &ctx.echo, (BitSequence *)hash3, 512, (const BitSequence *)hash3, 64 ); - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // 2 intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); @@ -1310,7 +1267,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid ) hamsi512_4way_update( &ctx.hamsi, vhash, 64 ); hamsi512_4way_close( &ctx.hamsi, vhash ); - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // 3 bmw512_4way_init( &ctx.bmw ); @@ -1375,7 +1332,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid ) sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // 4 intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); @@ -1472,7 +1429,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid ) shavite512_2way_init( &ctx.shavite ); shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 ); - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // 5 rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 ); @@ -1557,7 +1514,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid ) sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 ); - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // 6 intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); @@ -1650,7 +1607,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid ) sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 ); - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // 7 intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); @@ -1745,46 +1702,4 @@ int sonoa_4way_hash( void *state, const void *input, int thrid ) return 1; } -int scanhash_sonoa_4way( struct work *work, const uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ - uint32_t hash[4*16] __attribute__ ((aligned (64))); - uint32_t vdata[24*4] __attribute__ ((aligned (64))); - uint32_t lane_hash[8] __attribute__ ((aligned (32))); - uint32_t *hashd7 = &( hash[7<<2] ); - uint32_t *pdata = work->data; - const uint32_t *ptarget = work->target; - const uint32_t first_nonce = pdata[19]; - const uint32_t last_nonce = max_nonce - 4; - const uint32_t targ32 = ptarget[7]; - uint32_t n = first_nonce; - __m256i *noncev = (__m256i*)vdata + 9; - const int thr_id = mythr->id; - - mm256_bswap32_intrlv80_4x64( vdata, pdata ); - *noncev = mm256_intrlv_blend_32( - _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); - - do - { - if ( sonoa_4way_hash( hash, vdata, thr_id ) ) - for ( int lane = 0; lane < 4; lane++ ) - if ( unlikely( hashd7[ lane ] <= targ32 ) ) - { - extr_lane_4x32( lane_hash, hash, lane, 256 ); - if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) ) - { - pdata[19] = bswap_32( n + lane ); - submit_solution( work, lane_hash, mythr ); - } - } - *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); - n += 4; - } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); - pdata[19] = n; - *hashes_done = n - first_nonce; - return 0; -} - #endif diff --git a/algo/x17/sonoa-gate.c b/algo/x17/sonoa-gate.c index a187913..926beb4 100644 --- a/algo/x17/sonoa-gate.c +++ b/algo/x17/sonoa-gate.c @@ -3,14 +3,13 @@ bool register_sonoa_algo( algo_gate_t* gate ) { #if defined (SONOA_8WAY) - gate->scanhash = (void*)&scanhash_sonoa_8way; + gate->scanhash = (void*)&scanhash_8way_64in_32out; gate->hash = (void*)&sonoa_8way_hash; #elif defined (SONOA_4WAY) - gate->scanhash = (void*)&scanhash_sonoa_4way; + gate->scanhash = (void*)&scanhash_4way_64in_32out; gate->hash = (void*)&sonoa_4way_hash; #else init_sonoa_ctx(); -// gate->scanhash = (void*)&scanhash_sonoa; gate->hash = (void*)&sonoa_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; diff --git a/algo/x17/sonoa-gate.h b/algo/x17/sonoa-gate.h index 895bcd2..997bff1 100644 --- a/algo/x17/sonoa-gate.h +++ b/algo/x17/sonoa-gate.h @@ -14,21 +14,15 @@ bool register_sonoa_algo( algo_gate_t* gate ); #if defined(SONOA_8WAY) -int sonoa_8way_hash( void *state, const void *input, int thrid ); -int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ); +int sonoa_8way_hash( void *state, const void *input, int thr_id ); #elif defined(SONOA_4WAY) -int sonoa_4way_hash( void *state, const void *input, int thrid ); -int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ); +int sonoa_4way_hash( void *state, const void *input, int thr_id ); #else -int sonoa_hash( void *state, const void *input, int thrid ); -int scanhash_sonoa( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ); +int sonoa_hash( void *state, const void *input, int thr_id ); void init_sonoa_ctx(); #endif diff --git a/algo/x17/sonoa.c b/algo/x17/sonoa.c index 502283c..19dbcb7 100644 --- a/algo/x17/sonoa.c +++ b/algo/x17/sonoa.c @@ -83,7 +83,7 @@ void init_sonoa_ctx() sph_haval256_5_init(&sonoa_ctx.haval); }; -int sonoa_hash( void *state, const void *input, int thrid ) +int sonoa_hash( void *state, const void *input, int thr_id ) { uint8_t hash[128] __attribute__ ((aligned (64))); sonoa_ctx_holder ctx __attribute__ ((aligned (64))); @@ -132,7 +132,7 @@ int sonoa_hash( void *state, const void *input, int thrid ) sph_echo512_close(&ctx.echo, hash); #endif - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // sph_bmw512_init( &ctx.bmw); @@ -190,7 +190,7 @@ int sonoa_hash( void *state, const void *input, int thrid ) sph_hamsi512(&ctx.hamsi, hash, 64); sph_hamsi512_close(&ctx.hamsi, hash); - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // sph_bmw512_init( &ctx.bmw); @@ -252,7 +252,7 @@ int sonoa_hash( void *state, const void *input, int thrid ) sph_fugue512(&ctx.fugue, hash, 64); sph_fugue512_close(&ctx.fugue, hash); - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // sph_bmw512_init( &ctx.bmw); @@ -336,7 +336,7 @@ int sonoa_hash( void *state, const void *input, int thrid ) sph_shavite512(&ctx.shavite, hash, 64); sph_shavite512_close(&ctx.shavite, hash); - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // sph_bmw512_init( &ctx.bmw); @@ -410,7 +410,7 @@ int sonoa_hash( void *state, const void *input, int thrid ) sph_whirlpool(&ctx.whirlpool, hash, 64); sph_whirlpool_close(&ctx.whirlpool, hash); - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // sph_bmw512_init( &ctx.bmw); sph_bmw512(&ctx.bmw, hash, 64); @@ -487,7 +487,7 @@ int sonoa_hash( void *state, const void *input, int thrid ) sph_whirlpool(&ctx.whirlpool, hash, 64); sph_whirlpool_close(&ctx.whirlpool, hash); - if ( work_restart[thrid].restart ) return 0; + if ( work_restart[thr_id].restart ) return 0; // sph_bmw512_init( &ctx.bmw); diff --git a/algo/x17/x17-4way.c b/algo/x17/x17-4way.c index 30e4659..4fe98bc 100644 --- a/algo/x17/x17-4way.c +++ b/algo/x17/x17-4way.c @@ -57,7 +57,7 @@ union _x17_8way_context_overlay } __attribute__ ((aligned (64))); typedef union _x17_8way_context_overlay x17_8way_context_overlay; -int x17_8way_hash( void *state, const void *input ) +int x17_8way_hash( void *state, const void *input, int thr_id ) { uint64_t vhash[8*8] __attribute__ ((aligned (128))); uint64_t vhashA[8*8] __attribute__ ((aligned (64))); @@ -234,50 +234,6 @@ int x17_8way_hash( void *state, const void *input ) return 1; } -int scanhash_x17_8way( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ - uint32_t hash32[8*8] __attribute__ ((aligned (128))); - uint32_t vdata[20*8] __attribute__ ((aligned (64))); - uint32_t lane_hash[8] __attribute__ ((aligned (64))); - uint32_t *hash32_d7 = &(hash32[7*8]); - uint32_t *pdata = work->data; - const uint32_t *ptarget = work->target; - const uint32_t first_nonce = pdata[19]; - const uint32_t last_nonce = max_nonce - 8; - __m512i *noncev = (__m512i*)vdata + 9; - uint32_t n = first_nonce; - const int thr_id = mythr->id; - const uint32_t targ32_d7 = ptarget[7]; - const bool bench = opt_benchmark; - - mm512_bswap32_intrlv80_8x64( vdata, pdata ); - *noncev = mm512_intrlv_blend_32( - _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0, - n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); - do - { - x17_8way_hash( hash32, vdata ); - - for ( int lane = 0; lane < 8; lane++ ) - if ( unlikely( ( hash32_d7[ lane ] <= targ32_d7 ) && !bench ) ) - { - extr_lane_8x32( lane_hash, hash32, lane, 256 ); - if ( likely( valid_hash( lane_hash, ptarget ) ) ) - { - pdata[19] = bswap_32( n + lane ); - submit_solution( work, lane_hash, mythr ); - } - } - *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); - n += 8; - } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); - pdata[19] = n; - *hashes_done = n - first_nonce; - return 0; -} - #elif defined(X17_4WAY) union _x17_4way_context_overlay @@ -302,7 +258,7 @@ union _x17_4way_context_overlay }; typedef union _x17_4way_context_overlay x17_4way_context_overlay; -int x17_4way_hash( void *state, const void *input ) +int x17_4way_hash( void *state, const void *input, int thr_id ) { uint64_t vhash[8*4] __attribute__ ((aligned (64))); uint64_t vhashA[8*4] __attribute__ ((aligned (64))); @@ -405,47 +361,4 @@ int x17_4way_hash( void *state, const void *input ) return 1; } -int scanhash_x17_4way( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ - uint32_t hash32[8*4] __attribute__ ((aligned (64))); - uint32_t vdata[20*4] __attribute__ ((aligned (64))); - uint32_t lane_hash[8] __attribute__ ((aligned (64))); - uint32_t *hash32_d7 = &(hash32[ 7*4 ]); - uint32_t *pdata = work->data; - const uint32_t *ptarget = work->target; - const uint32_t first_nonce = pdata[19]; - const uint32_t last_nonce = max_nonce - 4; - __m256i *noncev = (__m256i*)vdata + 9; - uint32_t n = first_nonce; - const int thr_id = mythr->id; - const uint32_t targ32_d7 = ptarget[7]; - const bool bench = opt_benchmark; - - mm256_bswap32_intrlv80_4x64( vdata, pdata ); - *noncev = mm256_intrlv_blend_32( - _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); - do - { - x17_4way_hash( hash32, vdata ); - - for ( int lane = 0; lane < 4; lane++ ) - if ( unlikely( hash32_d7[ lane ] <= targ32_d7 && !bench ) ) - { - extr_lane_4x32( lane_hash, hash32, lane, 256 ); - if ( valid_hash( lane_hash, ptarget ) ) - { - pdata[19] = bswap_32( n + lane ); - submit_solution( work, lane_hash, mythr ); - } - } - *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); - n += 4; - } while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) ); - pdata[19] = n; - *hashes_done = n - first_nonce; - return 0; -} - #endif diff --git a/algo/x17/x17-gate.c b/algo/x17/x17-gate.c index 4ebfa0b..eee3d60 100644 --- a/algo/x17/x17-gate.c +++ b/algo/x17/x17-gate.c @@ -3,10 +3,10 @@ bool register_x17_algo( algo_gate_t* gate ) { #if defined (X17_8WAY) - gate->scanhash = (void*)&scanhash_x17_8way; + gate->scanhash = (void*)&scanhash_8way_64in_32out; gate->hash = (void*)&x17_8way_hash; #elif defined (X17_4WAY) - gate->scanhash = (void*)&scanhash_x17_4way; + gate->scanhash = (void*)&scanhash_4way_64in_32out; gate->hash = (void*)&x17_4way_hash; #else gate->hash = (void*)&x17_hash; diff --git a/algo/x17/x17-gate.h b/algo/x17/x17-gate.h index 1b8ada4..003d77f 100644 --- a/algo/x17/x17-gate.h +++ b/algo/x17/x17-gate.h @@ -14,14 +14,11 @@ bool register_x17_algo( algo_gate_t* gate ); #if defined(X17_8WAY) -int x17_8way_hash( void *state, const void *input ); -int scanhash_x17_8way( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ); +int x17_8way_hash( void *state, const void *input, int thr_id ); + #elif defined(X17_4WAY) -int x17_4way_hash( void *state, const void *input ); -int scanhash_x17_4way( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ); +int x17_4way_hash( void *state, const void *input, int thr_id ); #endif diff --git a/algo/x17/xevan-4way.c b/algo/x17/xevan-4way.c index 3f1ff14..fbf5d26 100644 --- a/algo/x17/xevan-4way.c +++ b/algo/x17/xevan-4way.c @@ -57,7 +57,7 @@ union _xevan_8way_context_overlay } __attribute__ ((aligned (64))); typedef union _xevan_8way_context_overlay xevan_8way_context_overlay; -int xevan_8way_hash( void *output, const void *input ) +int xevan_8way_hash( void *output, const void *input, int thr_id ) { uint64_t vhash[16<<3] __attribute__ ((aligned (128))); uint64_t vhashA[16<<3] __attribute__ ((aligned (64))); @@ -399,50 +399,6 @@ int xevan_8way_hash( void *output, const void *input ) return 1; } -int scanhash_xevan_8way( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ - uint32_t hash[8*8] __attribute__ ((aligned (128))); - uint32_t vdata[20*8] __attribute__ ((aligned (64))); - uint32_t lane_hash[8] __attribute__ ((aligned (64))); - uint32_t *hashd7 = &(hash[7*8]); - uint32_t *pdata = work->data; - const uint32_t *ptarget = work->target; - const uint32_t first_nonce = pdata[19]; - const uint32_t last_nonce = max_nonce - 8; - __m512i *noncev = (__m512i*)vdata + 9; - uint32_t n = first_nonce; - const int thr_id = mythr->id; - const uint32_t targ32 = ptarget[7]; - const bool bench = opt_benchmark; - - mm512_bswap32_intrlv80_8x64( vdata, pdata ); - *noncev = mm512_intrlv_blend_32( - _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0, - n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); - do - { - xevan_8way_hash( hash, vdata ); - - for ( int lane = 0; lane < 8; lane++ ) - if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) ) - { - extr_lane_8x32( lane_hash, hash, lane, 256 ); - if ( likely( valid_hash( lane_hash, ptarget ) ) ) - { - pdata[19] = bswap_32( n + lane ); - submit_solution( work, lane_hash, mythr ); - } - } - *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); - n += 8; - } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); - pdata[19] = n; - *hashes_done = n - first_nonce; - return 0; -} - #elif defined(XEVAN_4WAY) union _xevan_4way_context_overlay @@ -467,7 +423,7 @@ union _xevan_4way_context_overlay }; typedef union _xevan_4way_context_overlay xevan_4way_context_overlay; -int xevan_4way_hash( void *output, const void *input ) +int xevan_4way_hash( void *output, const void *input, int thr_id ) { uint64_t hash0[16] __attribute__ ((aligned (64))); uint64_t hash1[16] __attribute__ ((aligned (64))); @@ -672,47 +628,4 @@ int xevan_4way_hash( void *output, const void *input ) return 1; } -int scanhash_xevan_4way( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ) -{ - uint32_t hash[16*4] __attribute__ ((aligned (128))); - uint32_t vdata[20*4] __attribute__ ((aligned (64))); - uint32_t lane_hash[8] __attribute__ ((aligned (64))); - uint32_t *hashd7 = &(hash[7<<2]); - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - int thr_id = mythr->id; - __m256i *noncev = (__m256i*)vdata + 9; - const uint32_t targ32 = ptarget[7]; - const uint32_t first_nonce = pdata[19]; - const uint32_t last_nonce = max_nonce - 4; - uint32_t n = first_nonce; - const bool bench = opt_benchmark; - - if ( bench ) ptarget[7] = 0x0cff; - - mm256_bswap32_intrlv80_4x64( vdata, pdata ); - *noncev = mm256_intrlv_blend_32( - _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); - do { - xevan_4way_hash( hash, vdata ); - for ( int lane = 0; lane < 4; lane++ ) - if ( unlikely( hashd7[ lane ] <= targ32 ) && ! bench ) - { - extr_lane_4x32( lane_hash, hash, lane, 256 ); - if ( valid_hash( lane_hash, ptarget ) ) - { - pdata[19] = bswap_32( n + lane ); - submit_solution( work, lane_hash, mythr ); - } - } - *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); - n += 4; - } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); - pdata[19] = n; - *hashes_done = n - first_nonce; - return 0; -} - #endif diff --git a/algo/x17/xevan-gate.c b/algo/x17/xevan-gate.c index b129330..184ed2d 100644 --- a/algo/x17/xevan-gate.c +++ b/algo/x17/xevan-gate.c @@ -3,10 +3,10 @@ bool register_xevan_algo( algo_gate_t* gate ) { #if defined (XEVAN_8WAY) - gate->scanhash = (void*)&scanhash_xevan_8way; + gate->scanhash = (void*)&scanhash_8way_64in_32out; gate->hash = (void*)&xevan_8way_hash; #elif defined (XEVAN_4WAY) - gate->scanhash = (void*)&scanhash_xevan_4way; + gate->scanhash = (void*)&scanhash_4way_64in_32out; gate->hash = (void*)&xevan_4way_hash; #else init_xevan_ctx(); diff --git a/algo/x17/xevan-gate.h b/algo/x17/xevan-gate.h index 488d1c0..8ef9a2e 100644 --- a/algo/x17/xevan-gate.h +++ b/algo/x17/xevan-gate.h @@ -14,16 +14,11 @@ bool register_xevan_algo( algo_gate_t* gate ); #if defined(XEVAN_8WAY) -int xevan_8way_hash( void *state, const void *input ); -int scanhash_xevan_8way( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ); +int xevan_8way_hash( void *state, const void *input, int thr_id ); + #elif defined(XEVAN_4WAY) -int xevan_4way_hash( void *state, const void *input ); -int scanhash_xevan_4way( struct work *work, uint32_t max_nonce, - uint64_t *hashes_done, struct thr_info *mythr ); - -//void init_xevan_4way_ctx(); +int xevan_4way_hash( void *state, const void *input, int thr_id ); #else diff --git a/configure b/configure index 19d2efa..d427b66 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.14.2. +# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.14.3. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -577,8 +577,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt' -PACKAGE_VERSION='3.14.2' -PACKAGE_STRING='cpuminer-opt 3.14.2' +PACKAGE_VERSION='3.14.3' +PACKAGE_STRING='cpuminer-opt 3.14.3' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures cpuminer-opt 3.14.2 to adapt to many kinds of systems. +\`configure' configures cpuminer-opt 3.14.3 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1404,7 +1404,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of cpuminer-opt 3.14.2:";; + short | recursive ) echo "Configuration of cpuminer-opt 3.14.3:";; esac cat <<\_ACEOF @@ -1509,7 +1509,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -cpuminer-opt configure 3.14.2 +cpuminer-opt configure 3.14.3 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by cpuminer-opt $as_me 3.14.2, which was +It was created by cpuminer-opt $as_me 3.14.3, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2993,7 +2993,7 @@ fi # Define the identity of the package. PACKAGE='cpuminer-opt' - VERSION='3.14.2' + VERSION='3.14.3' cat >>confdefs.h <<_ACEOF @@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by cpuminer-opt $as_me 3.14.2, which was +This file was extended by cpuminer-opt $as_me 3.14.3, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -6756,7 +6756,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -cpuminer-opt config.status 3.14.2 +cpuminer-opt config.status 3.14.3 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 28addf0..9654d17 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([cpuminer-opt], [3.14.2]) +AC_INIT([cpuminer-opt], [3.14.3]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/cpu-miner.c b/cpu-miner.c index 6fdc6bd..0cad187 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -92,7 +92,7 @@ bool want_longpoll = false; bool have_longpoll = false; bool have_gbt = true; bool allow_getwork = true; -bool want_stratum = true; +bool want_stratum = true; // pretty useless bool have_stratum = false; bool allow_mininginfo = true; bool use_syslog = false; @@ -215,7 +215,7 @@ static char const short_options[] = static struct work g_work __attribute__ ((aligned (64))) = {{ 0 }}; time_t g_work_time = 0; -pthread_mutex_t g_work_lock; +pthread_rwlock_t g_work_lock; static bool submit_old = false; char* lp_id; @@ -1232,7 +1232,7 @@ static int share_result( int result, struct work *work, if ( use_colors ) { - bcol = acol = scol = rcol = CL_N; + bcol = acol = scol = rcol = CL_WHT; if ( likely( result ) ) { acol = CL_WHT CL_GRN; @@ -1242,27 +1242,22 @@ static int share_result( int result, struct work *work, else rcol = CL_WHT CL_RED; } - applog( LOG_NOTICE, "%d %s%s %s%s %s%s %s%s" CL_N ", %.3f sec (%dms)", + applog( LOG_NOTICE, "%d %s%s %s%s %s%s %s%s" CL_WHT ", %.3f sec (%dms)", my_stats.share_count, acol, ares, scol, sres, rcol, rres, bcol, bres, share_time, latency ); -/* - if ( !opt_quiet ) + if ( unlikely( opt_debug || !result || solved ) ) { if ( have_stratum ) - applog2( LOG_INFO, "Diff %.5g (%.3g), %sBlock %d" CL_N ", %sJob %s", - my_stats.share_diff, share_ratio, bcol, stratum.block_height, - scol, my_stats.job_id ); + applog2( LOG_INFO, "Diff %.5g, Block %d, Job %s", + my_stats.share_diff, stratum.block_height, + my_stats.job_id ); else - { - uint64_t height = work ? work->height : last_block_height; - applog2( LOG_INFO, "Diff %.5g (%.3g), %sBlock %d", - my_stats.share_diff, share_ratio, bcol, height ); - } + applog2( LOG_INFO, "Diff %.5g, Block %d", + my_stats.share_diff, work ? work->height : last_block_height ); } -*/ - if ( unlikely( opt_debug || !( opt_quiet || result || stale ) ) ) + if ( unlikely( !( opt_quiet || result || stale ) ) ) { uint32_t str[8]; @@ -1835,9 +1830,9 @@ bool submit_solution( struct work *work, const void *hash, if unlikely( !have_stratum && !have_longpoll ) { // block solved, force getwork - pthread_mutex_lock( &g_work_lock ); + pthread_rwlock_wrlock( &g_work_lock ); g_work_time = 0; - pthread_mutex_unlock( &g_work_lock ); + pthread_rwlock_unlock( &g_work_lock ); } if ( !opt_quiet ) @@ -1960,7 +1955,7 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id, uint32_t *nonceptr = work->data + algo_gate.nonce_index; bool force_new_work = false; - pthread_mutex_lock( &g_work_lock ); + pthread_rwlock_rdlock( &g_work_lock ); if ( have_stratum ) force_new_work = work->job_id ? strtoul( work->job_id, NULL, 16 ) @@ -1978,7 +1973,7 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id, else ++(*nonceptr); - pthread_mutex_unlock( &g_work_lock ); + pthread_rwlock_unlock( &g_work_lock ); } bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum, @@ -1998,7 +1993,7 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work ) bool new_job = *get_stratum_job_ntime() != g_work->data[ algo_gate.ntime_index ]; - pthread_mutex_lock( &g_work_lock ); + pthread_rwlock_wrlock( &g_work_lock ); pthread_mutex_lock( &sctx->work_lock ); free( g_work->job_id ); @@ -2013,11 +2008,13 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work ) g_work->targetdiff = sctx->job.diff / ( opt_target_factor * opt_diff_factor ); diff_to_hash( g_work->target, g_work->targetdiff ); + // Increment extranonce2 + for ( int t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ ); g_work_time = time(NULL); restart_threads(); pthread_mutex_unlock( &sctx->work_lock ); - pthread_mutex_unlock( &g_work_lock ); + pthread_rwlock_unlock( &g_work_lock ); pthread_mutex_lock( &stats_lock ); @@ -2037,11 +2034,11 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work ) else if ( new_job && g_work->job_id ) applog( LOG_BLUE, "New Work: Block %d, Net diff %.5g, Job %s", sctx->block_height, net_diff, g_work->job_id ); - else if ( opt_debug ) + else if ( !opt_quiet ) { unsigned char *xnonce2str = abin2hex( g_work->xnonce2, g_work->xnonce2_len ); - applog( LOG_INFO, "Extranonce2 %s, Block %d, Net Diff %.5g", + applog( LOG_INFO, "Extranonce %s, Block %d, Net Diff %.5g", xnonce2str, sctx->block_height, net_diff ); free( xnonce2str ); } @@ -2222,24 +2219,24 @@ static void *miner_thread( void *userdata ) } else { - int scantime = have_longpoll ? LP_SCANTIME : opt_scantime; - pthread_mutex_lock( &g_work_lock ); + pthread_rwlock_wrlock( &g_work_lock ); - if ( ( ( time(NULL) - g_work_time ) >= scantime ) + if ( ( ( time(NULL) - g_work_time ) + >= ( have_longpoll ? LP_SCANTIME : opt_scantime ) ) || ( *nonceptr >= end_nonce ) ) { if ( unlikely( !get_work( mythr, &g_work ) ) ) { - pthread_mutex_unlock( &g_work_lock ); + pthread_rwlock_unlock( &g_work_lock ); applog( LOG_ERR, "work retrieval failed, exiting " - "mining thread %d", thr_id ); + "mining thread %d", thr_id ); goto out; } g_work_time = time(NULL); restart_threads(); } - pthread_mutex_unlock( &g_work_lock ); + pthread_rwlock_unlock( &g_work_lock ); } algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce ); @@ -2349,10 +2346,10 @@ static void *miner_thread( void *userdata ) // we can't submit twice a block! if unlikely( !have_stratum && !have_longpoll ) { - pthread_mutex_lock( &g_work_lock ); + pthread_rwlock_wrlock( &g_work_lock ); // will force getwork g_work_time = 0; - pthread_mutex_unlock( &g_work_lock ); + pthread_rwlock_unlock( &g_work_lock ); } } @@ -2384,11 +2381,9 @@ static void *miner_thread( void *userdata ) if ( use_colors && ( curr_temp >= 70 ) ) { if ( curr_temp >= 80 ) - sprintf( tempstr, "%s%d C%s", - CL_WHT CL_RED, curr_temp, CL_N ); + sprintf( tempstr, "%s%d C%s", CL_RED, curr_temp, CL_WHT ); else - sprintf( tempstr, "%s%d C%s", - CL_WHT CL_YLW, curr_temp, CL_N ); + sprintf( tempstr, "%s%d C%s", CL_YLW, curr_temp, CL_WHT ); } else sprintf( tempstr, "%d C", curr_temp ); @@ -2539,7 +2534,8 @@ start: res = json_object_get(val, "result"); soval = json_object_get(res, "submitold"); submit_old = soval ? json_is_true(soval) : false; - pthread_mutex_lock(&g_work_lock); + + pthread_rwlock_wrlock( &g_work_lock ); // This code has been here for a long time even though job_id isn't used. // This needs to be changed eventually to test the block height properly @@ -2573,14 +2569,16 @@ start: } } free(start_job_id); - pthread_mutex_unlock(&g_work_lock); + + pthread_rwlock_unlock( &g_work_lock ); + json_decref(val); } else // !val { - pthread_mutex_lock(&g_work_lock); - g_work_time -= LP_SCANTIME; - pthread_mutex_unlock(&g_work_lock); + pthread_rwlock_wrlock( &g_work_lock ); + g_work_time -= LP_SCANTIME; + pthread_rwlock_unlock( &g_work_lock ); if (err == CURLE_OPERATION_TIMEDOUT) { restart_threads(); @@ -2689,12 +2687,8 @@ void std_build_block_header( struct work* g_work, uint32_t version, void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) { uchar merkle_tree[64] = { 0 }; - size_t t; algo_gate.gen_merkle_root( merkle_tree, sctx ); - // Increment extranonce2 - for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ ); - // Assemble block header algo_gate.build_block_header( g_work, le32dec( sctx->job.version ), (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree, le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits), @@ -2733,10 +2727,10 @@ static void *stratum_thread(void *userdata ) while ( !stratum.curl ) { - pthread_mutex_lock( &g_work_lock ); + pthread_rwlock_wrlock( &g_work_lock ); g_work_time = 0; - pthread_mutex_unlock( &g_work_lock ); - restart_threads(); + pthread_rwlock_unlock( &g_work_lock ); +// restart_threads(); if ( !stratum_connect( &stratum, stratum.url ) || !stratum_subscribe( &stratum ) || !stratum_authorize( &stratum, rpc_user, rpc_pass ) ) @@ -2872,167 +2866,180 @@ void parse_arg(int key, char *arg ) uint64_t ul; double d; - switch(key) - { - case 'a': - get_algo_alias( &arg ); - for (i = 1; i < ALGO_COUNT; i++) - { - v = (int) strlen(algo_names[i]); - if (v && !strncasecmp(arg, algo_names[i], v)) - { - if (arg[v] == '\0') - { - opt_algo = (enum algos) i; - break; - } - if (arg[v] == ':') - { - char *ep; - v = strtol(arg+v+1, &ep, 10); - if (*ep || v < 2) - continue; - opt_algo = (enum algos) i; - opt_param_n = v; - break; - } - } + switch( key ) + { + case 'a': // algo + get_algo_alias( &arg ); + for (i = 1; i < ALGO_COUNT; i++) + { + v = (int) strlen( algo_names[i] ); + if ( v && !strncasecmp( arg, algo_names[i], v ) ) + { + if ( arg[v] == '\0' ) + { + opt_algo = (enum algos) i; + break; + } + if ( arg[v] == ':' ) + { + char *ep; + v = strtol( arg+v+1, &ep, 10 ); + if ( *ep || v < 2 ) + continue; + opt_algo = (enum algos) i; + opt_param_n = v; + break; + } + } } - if (i == ALGO_COUNT) - { - applog(LOG_ERR,"Unknown algo: %s",arg); - show_usage_and_exit(1); - } - break; + if ( i == ALGO_COUNT ) + { + applog( LOG_ERR,"Unknown algo: %s",arg ); + show_usage_and_exit( 1 ); + } + break; - case 'b': + case 'b': // api-bind opt_api_enabled = true; p = strstr(arg, ":"); - if (p) { + if ( p ) + { /* ip:port */ - if (p - arg > 0) { + if ( p - arg > 0 ) + { opt_api_allow = strdup(arg); opt_api_allow[p - arg] = '\0'; } opt_api_listen = atoi(p + 1); } - else if (arg && strstr(arg, ".")) { + else if ( arg && strstr( arg, "." ) ) + { /* ip only */ free(opt_api_allow); opt_api_allow = strdup(arg); opt_api_listen = default_api_listen; } - else if (arg) { + else if ( arg ) + { /* port or 0 to disable */ opt_api_allow = default_api_allow; opt_api_listen = atoi(arg); } break; - case 1030: /* --api-remote */ + case 1030: // api-remote opt_api_remote = 1; break; - case 'B': + case 'B': // background opt_background = true; use_colors = false; break; - case 'c': { + case 'c': { // config json_error_t err; json_t *config; if (arg && strstr(arg, "://")) config = json_load_url(arg, &err); - else + else config = JSON_LOADF(arg, &err); if (!json_is_object(config)) - { + { if (err.line < 0) fprintf(stderr, "%s\n", err.text); else - fprintf(stderr, "%s:%d: %s\n", - arg, err.line, err.text); + fprintf(stderr, "%s:%d: %s\n", arg, err.line, err.text); } - else - { + else + { parse_config(config, arg); json_decref(config); } break; } - case 'q': - opt_quiet = true; + + // debug overrides quiet + case 'q': // quiet + if ( !( opt_debug || opt_protocol ) ) opt_quiet = true; break; - case 'D': + case 'D': // debug opt_debug = true; - break; - case 'p': + opt_quiet = false; + break; + case 'p': // pass free(rpc_pass); rpc_pass = strdup(arg); strhide(arg); break; - case 'P': + case 'P': // protocol opt_protocol = true; + opt_quiet = false; break; - case 'r': + case 'r': // retries v = atoi(arg); if (v < -1 || v > 9999) /* sanity check */ show_usage_and_exit(1); opt_retries = v; break; - case 1025: + case 1025: // retry-pause v = atoi(arg); if (v < 1 || v > 9999) /* sanity check */ show_usage_and_exit(1); opt_fail_pause = v; break; - case 's': + case 's': // scantime v = atoi(arg); if (v < 1 || v > 9999) /* sanity check */ show_usage_and_exit(1); opt_scantime = v; break; - case 'T': + case 'T': // timeout v = atoi(arg); if (v < 1 || v > 99999) /* sanity check */ show_usage_and_exit(1); opt_timeout = v; break; - case 't': + case 't': // threads v = atoi(arg); if (v < 0 || v > 9999) /* sanity check */ show_usage_and_exit(1); opt_n_threads = v; break; - case 'u': + case 'u': // user free(rpc_user); rpc_user = strdup(arg); break; - case 'o': { /* --url */ + case 'o': // url + { char *ap, *hp; - ap = strstr(arg, "://"); + ap = strstr( arg, "://" ); ap = ap ? ap + 3 : arg; - hp = strrchr(arg, '@'); - if (hp) { + hp = strrchr( arg, '@' ); + if ( hp ) + { *hp = '\0'; - p = strchr(ap, ':'); - if (p) { - free(rpc_userpass); - rpc_userpass = strdup(ap); - free(rpc_user); - rpc_user = (char*) calloc(p - ap + 1, 1); - strncpy(rpc_user, ap, p - ap); - free(rpc_pass); - rpc_pass = strdup(++p); - if (*p) *p++ = 'x'; - v = (int) strlen(hp + 1) + 1; - memmove(p + 1, hp + 1, v); - memset(p + v, 0, hp - p); + p = strchr( ap, ':' ); + if ( p ) + { + free( rpc_userpass ); + rpc_userpass = strdup( ap ); + free( rpc_user ); + rpc_user = (char*)calloc( p - ap + 1, 1 ); + strncpy( rpc_user, ap, p - ap ); + free( rpc_pass ); + rpc_pass = strdup( ++p ); + if ( *p ) *p++ = 'x'; + v = (int)strlen( hp + 1 ) + 1; + memmove( p + 1, hp + 1, v ); + memset( p + v, 0, hp - p ); hp = p; - } else { - free(rpc_user); - rpc_user = strdup(ap); + } + else + { + free( rpc_user ); + rpc_user = strdup( ap ); } *hp++ = '@'; - } else + } + else hp = ap; if ( ap != arg ) { @@ -3048,23 +3055,26 @@ void parse_arg(int key, char *arg ) rpc_url = strdup(arg); strcpy(rpc_url + (ap - arg), hp); short_url = &rpc_url[ap - arg]; - } else { - if (*hp == '\0' || *hp == '/') { - fprintf(stderr, "invalid URL -- '%s'\n", - arg); - show_usage_and_exit(1); + } + else + { + if ( *hp == '\0' || *hp == '/' ) + { + fprintf( stderr, "invalid URL -- '%s'\n", arg ); + show_usage_and_exit( 1 ); } - free(rpc_url); + free( rpc_url ); rpc_url = (char*) malloc( strlen(hp) + 15 ); sprintf( rpc_url, "stratum+tcp://%s", hp ); short_url = &rpc_url[ sizeof("stratum+tcp://") - 1 ]; } - have_stratum = !opt_benchmark && !strncasecmp(rpc_url, "stratum", 7); + have_stratum = !opt_benchmark && !strncasecmp( rpc_url, "stratum", 7 ); break; } - case 'O': /* --userpass */ + case 'O': // userpass p = strchr(arg, ':'); - if (!p) { + if (!p) + { fprintf(stderr, "invalid username:password pair -- '%s'\n", arg); show_usage_and_exit(1); } @@ -3077,15 +3087,15 @@ void parse_arg(int key, char *arg ) rpc_pass = strdup(++p); strhide(p); break; - case 'x': /* --proxy */ - if (!strncasecmp(arg, "socks4://", 9)) + case 'x': // proxy + if ( !strncasecmp( arg, "socks4://", 9 ) ) opt_proxy_type = CURLPROXY_SOCKS4; - else if (!strncasecmp(arg, "socks5://", 9)) + else if ( !strncasecmp( arg, "socks5://", 9 ) ) opt_proxy_type = CURLPROXY_SOCKS5; #if LIBCURL_VERSION_NUM >= 0x071200 - else if (!strncasecmp(arg, "socks4a://", 10)) + else if ( !strncasecmp( arg, "socks4a://", 10 ) ) opt_proxy_type = CURLPROXY_SOCKS4A; - else if (!strncasecmp(arg, "socks5h://", 10)) + else if ( !strncasecmp( arg, "socks5h://", 10 ) ) opt_proxy_type = CURLPROXY_SOCKS5_HOSTNAME; #endif else @@ -3093,42 +3103,42 @@ void parse_arg(int key, char *arg ) free(opt_proxy); opt_proxy = strdup(arg); break; - case 1001: + case 1001: // cert free(opt_cert); opt_cert = strdup(arg); break; - case 1002: + case 1002: // no-color use_colors = false; break; - case 1003: + case 1003: // no-longpoll want_longpoll = false; break; - case 1005: + case 1005: // benchmark opt_benchmark = true; want_longpoll = false; want_stratum = false; have_stratum = false; break; - case 1006: + case 1006: // cputest // print_hash_tests(); exit(0); - case 1007: + case 1007: // no-stratum want_stratum = false; opt_extranonce = false; break; - case 1008: + case 1008: // time-limit opt_time_limit = atoi(arg); break; - case 1009: + case 1009: // no-redirect opt_redirect = false; break; - case 1010: + case 1010: // no-getwork allow_getwork = false; break; - case 1011: + case 1011: // no-gbt have_gbt = false; break; - case 1012: + case 1012: // no-extranonce opt_extranonce = false; break; case 1014: // hash-meter @@ -3138,11 +3148,12 @@ void parse_arg(int key, char *arg ) if ( arg ) coinbase_address = strdup( arg ); break; case 1015: /* --coinbase-sig */ - if (strlen(arg) + 1 > sizeof(coinbase_sig)) { - fprintf(stderr, "coinbase signature too long\n"); - show_usage_and_exit(1); + if ( strlen( arg ) + 1 > sizeof(coinbase_sig) ) + { + fprintf( stderr, "coinbase signature too long\n" ); + show_usage_and_exit( 1 ); } - strcpy(coinbase_sig, arg); + strcpy( coinbase_sig, arg ); break; case 'f': d = atof(arg); @@ -3156,11 +3167,13 @@ void parse_arg(int key, char *arg ) show_usage_and_exit(1); opt_diff_factor = 1.0/d; break; - case 'S': +#ifdef HAVE_SYSLOG_H + case 'S': // syslog use_syslog = true; use_colors = false; break; - case 1020: +#endif + case 1020: // cpu-affinity p = strstr(arg, "0x"); if ( p ) ul = strtoull( p, NULL, 16 ); @@ -3171,14 +3184,14 @@ void parse_arg(int key, char *arg ) #if AFFINITY_USES_UINT128 // replicate the low 64 bits to make a full 128 bit mask if there are more // than 64 CPUs, otherwise zero extend the upper half. - opt_affinity = (uint128_t)ul; - if ( num_cpus > 64 ) - opt_affinity = (opt_affinity << 64 ) | opt_affinity; + opt_affinity = (uint128_t)ul; + if ( num_cpus > 64 ) + opt_affinity = (opt_affinity << 64 ) | opt_affinity; #else - opt_affinity = ul; + opt_affinity = ul; #endif break; - case 1021: + case 1021: // cpu-priority v = atoi(arg); if (v < 0 || v > 5) /* sanity check */ show_usage_and_exit(1); @@ -3637,7 +3650,7 @@ int main(int argc, char *argv[]) if ( !check_cpu_capability() ) exit(1); pthread_mutex_init( &stats_lock, NULL ); - pthread_mutex_init( &g_work_lock, NULL ); + pthread_rwlock_init( &g_work_lock, NULL ); pthread_mutex_init( &stratum.sock_lock, NULL ); pthread_mutex_init( &stratum.work_lock, NULL ); @@ -3797,7 +3810,7 @@ int main(int argc, char *argv[]) return 1; } } - if (want_stratum) + if ( have_stratum ) { if ( opt_debug ) applog(LOG_INFO,"Creating stratum thread"); diff --git a/miner.h b/miner.h index 52a3430..626ab09 100644 --- a/miner.h +++ b/miner.h @@ -83,6 +83,8 @@ enum { }; #endif +extern bool is_power_of_2( int n ); + static inline bool is_windows(void) { #ifdef WIN32 @@ -378,36 +380,25 @@ void cpu_brand_string( char* s ); float cpu_temp( int core ); */ -struct work { +struct work +{ + uint32_t target[8] __attribute__ ((aligned (64))); uint32_t data[48] __attribute__ ((aligned (64))); - uint32_t target[8] __attribute__ ((aligned (64))); - double targetdiff; -// double shareratio; double sharediff; double stratum_diff; - int height; char *txs; char *workid; - char *job_id; size_t xnonce2_len; unsigned char *xnonce2; bool sapling; bool stale; - - // x16rt - uint32_t merkleroothash[8]; - uint32_t witmerkleroothash[8]; - uint32_t denom10[8]; - uint32_t denom100[8]; - uint32_t denom1000[8]; - uint32_t denom10000[8]; - } __attribute__ ((aligned (64))); -struct stratum_job { +struct stratum_job +{ unsigned char prevhash[32]; unsigned char final_sapling_hash[32]; char *job_id; @@ -421,7 +412,7 @@ struct stratum_job { unsigned char ntime[4]; double diff; bool clean; - // for x16rt + // for x16rt-veil unsigned char extra[64]; unsigned char denom10[32]; unsigned char denom100[32]; @@ -756,7 +747,7 @@ extern double opt_diff_factor; extern double opt_target_factor; extern bool opt_randomize; extern bool allow_mininginfo; -extern pthread_mutex_t g_work_lock; +extern pthread_rwlock_t g_work_lock; extern time_t g_work_time; extern bool opt_stratum_stats; extern int num_cpus; diff --git a/simd-utils/simd-512.h b/simd-utils/simd-512.h index cc1e38c..571c36b 100644 --- a/simd-utils/simd-512.h +++ b/simd-utils/simd-512.h @@ -375,10 +375,10 @@ static inline void memcpy_512( __m512i *dst, const __m512i *src, const int n ) // Generic for odd rotations #define mm512_ror_x64( v, n ) _mm512_alignr_epi64( v, v, n ) -#define mm512_rol_x64( v, n ) _mm512_alignr_epi64( v, v, 8-n ) +#define mm512_rol_x64( v, n ) _mm512_alignr_epi64( v, v, 8-(n) ) #define mm512_ror_x32( v, n ) _mm512_alignr_epi32( v, v, n ) -#define mm512_rol_x32( v, n ) _mm512_alignr_epi32( v, v, 16-n ) +#define mm512_rol_x32( v, n ) _mm512_alignr_epi32( v, v, 16-(n) ) #define mm512_ror_1x16( v ) \ _mm512_permutexvar_epi16( m512_const_64( \ diff --git a/util.c b/util.c index 9a7d7cb..14c7286 100644 --- a/util.c +++ b/util.c @@ -81,6 +81,15 @@ struct thread_q { pthread_cond_t cond; }; +bool is_power_of_2( int n ) +{ + while ( n > 1 ) + { + if ( n % 2 != 0 ) return false; + n = n / 2; + } + return true; +} void applog2( int prio, const char *fmt, ... ) { @@ -609,6 +618,8 @@ json_t *json_rpc_call(CURL *curl, const char *url, goto err_out; } +// want_stratum is useless, and so is this code it seems. Nothing in +// hi appears to be set. /* If X-Stratum was found, activate Stratum */ if (want_stratum && hi.stratum_url && !strncasecmp(hi.stratum_url, "stratum+tcp://", 14)) {