diff --git a/README.md b/README.md index 99d127b..dd5af57 100644 --- a/README.md +++ b/README.md @@ -122,10 +122,10 @@ Supported Algorithms x13sm3 hsr (Hshare) x14 X14 x15 X15 - x16r Ravencoin (RVN) (original algo) - x16rv2 Ravencoin (RVN) (new algo) + x16r + x16rv2 Ravencoin (RVN) x16rt Gincoin (GIN) - x16rt_veil Veil (VEIL) + x16rt-veil Veil (VEIL) x16s Pigeoncoin (PGN) x17 x21s @@ -136,7 +136,7 @@ Supported Algorithms yescryptr32 WAVI yespower Cryply yespowerr16 Yenten (YTN) - yespoer-b2b generic yespower + blake2b + yespower-b2b generic yespower + blake2b zr5 Ziftr Errata @@ -160,10 +160,12 @@ Bugs ---- Users are encouraged to post their bug reports using git issues or on the -Bitcoin Talk forum at: +Bitcoin Talk forum or opening an issue in git: https://bitcointalk.org/index.php?topic=1326803.0 +https://github.com/JayDDee/cpuminer-opt/issues + All problem reports must be accompanied by a proper problem definition. This should include how the problem occurred, the command line and output from the miner showing the startup messages and any errors. @@ -175,10 +177,6 @@ Donations cpuminer-opt has no fees of any kind but donations are accepted. BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT - ETH: 0x72122edabcae9d3f57eab0729305a425f6fef6d0 - LTC: LdUwoHJnux9r9EKqFWNvAi45kQompHk6e8 - BCH: 1QKYkB6atn4P7RFozyziAXLEnurwnUM1cQ - BTG: GVUyECtRHeC5D58z9F3nGGfVQndwnsPnHQ Happy mining! diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 935c65f..8e1a504 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,11 +1,6 @@ cpuminer-opt is a console program run from the command line using the keyboard, not the mouse. -cpuminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs. -This feature requires recent SW including GCC version 5 or higher and -openssl version 1.1 or higher. It may also require using "-march=znver1" -compile flag. - Security warning ---------------- @@ -36,17 +31,26 @@ FreeBSD YMMV. Change Log ---------- +v3.9.9.1 + +Fixed a day1 bug that could cause the miner to idle for up to 2 minutes +under certain circumstances. + +Redesigned summary stats report now includes session statistics. + +More robust handling of statistics to reduce corruption. + +Removed --hide-diff option. + +Better handling of cpu-affinity with more than 64 CPUs. + v3.9.9 Added power2b algo for MicroBitcoin. - Added generic yespower-b2b (yespower + blake2b) algo to be used with the parameters introduced in v3.9.7 for yespower & yescrypt. - Display additional info when a share is rejected. - Some low level enhancements and minor tweaking of log output. - RELEASE_NOTES (this file) and README.md added to Windows release package. v3.9.8.1 diff --git a/algo-gate-api.c b/algo-gate-api.c index e8c0885..bcce910 100644 --- a/algo-gate-api.c +++ b/algo-gate-api.c @@ -116,8 +116,6 @@ void init_algo_gate( algo_gate_t* gate ) gate->get_nonceptr = (void*)&std_get_nonceptr; gate->work_decode = (void*)&std_le_work_decode; gate->decode_extra_data = (void*)&do_nothing; - gate->wait_for_diff = (void*)&std_wait_for_diff; - gate->get_max64 = (void*)&get_max64_0x1fffffLL; gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root; gate->stratum_gen_work = (void*)&std_stratum_gen_work; gate->build_stratum_request = (void*)&std_le_build_stratum_request; @@ -278,7 +276,7 @@ bool register_json_rpc2( algo_gate_t *gate ) applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will"); applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n"); - gate->wait_for_diff = (void*)&do_nothing; +// gate->wait_for_diff = (void*)&do_nothing; gate->get_new_work = (void*)&jr2_get_new_work; gate->get_nonceptr = (void*)&jr2_get_nonceptr; gate->stratum_gen_work = (void*)&jr2_stratum_gen_work; diff --git a/algo-gate-api.h b/algo-gate-api.h index 8792e53..7d88ec9 100644 --- a/algo-gate-api.h +++ b/algo-gate-api.h @@ -35,7 +35,7 @@ // 6. Determine if other non existant functions are required. // That is determined by the need to add code in cpu-miner.c // that applies only to the new algo. That is forbidden. All -// algo specific code must be in theh algo's file. +// algo specific code must be in the algo's file. // // 7. If new functions need to be added to the gate add the type // to the structure, declare a null instance in this file and define @@ -48,10 +48,10 @@ // instances as they are defined by default, or unsafe functions that // are not needed by the algo. // -// 9. Add an case entry to the switch/case in function register_gate +// 9. Add a case entry to the switch/case in function register_gate // in file algo-gate-api.c for the new algo. // -// 10 If a new function type was defined add an entry to ini talgo_gate +// 10 If a new function type was defined add an entry to init algo_gate // to initialize the new function to its null instance described in step 7. // // 11. If the new algo has aliases add them to the alias array in @@ -110,14 +110,7 @@ inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; } typedef struct { -// special case, only one target, provides a callback for scanhash to -// submit work with less overhead. -// bool (*submit_work ) ( struct thr_info*, const struct work* ); - // mandatory functions, must be overwritten -// Added a 5th arg for the thread_info structure to replace the int thr id -// in the first arg. Both will co-exist during the trasition. -//int ( *scanhash ) ( int, struct work*, uint32_t, uint64_t* ); int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* ); // optional unsafe, must be overwritten if algo uses function @@ -131,14 +124,12 @@ void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t*, bool ); uint32_t *( *get_nonceptr ) ( uint32_t* ); void ( *decode_extra_data ) ( struct work*, uint64_t* ); -void ( *wait_for_diff ) ( struct stratum_ctx* ); -int64_t ( *get_max64 ) (); bool ( *work_decode ) ( const json_t*, struct work* ); bool ( *submit_getwork_result ) ( CURL*, struct work* ); void ( *gen_merkle_root ) ( char*, struct stratum_ctx* ); void ( *build_extraheader ) ( struct work*, struct stratum_ctx* ); void ( *build_block_header ) ( struct work*, uint32_t, uint32_t*, - uint32_t*, uint32_t, uint32_t ); + uint32_t*, uint32_t, uint32_t ); void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* ); char* ( *malloc_txs_request ) ( struct work* ); void ( *set_work_data_endian ) ( struct work* ); @@ -200,8 +191,6 @@ void null_hash_suw(); // optional safe targets, default listed first unless noted. -void std_wait_for_diff(); - uint32_t *std_get_nonceptr( uint32_t *work_data ); uint32_t *jr2_get_nonceptr( uint32_t *work_data ); @@ -216,21 +205,13 @@ void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *work ); void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx ); void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx ); -// pick your favorite or define your own -int64_t get_max64_0x1fffffLL(); // default -int64_t get_max64_0x40LL(); -int64_t get_max64_0x3ffff(); -int64_t get_max64_0x3fffffLL(); -int64_t get_max64_0x1ffff(); -int64_t get_max64_0xffffLL(); - bool std_le_work_decode( const json_t *val, struct work *work ); bool std_be_work_decode( const json_t *val, struct work *work ); -bool jr2_work_decode( const json_t *val, struct work *work ); +bool jr2_work_decode( const json_t *val, struct work *work ); bool std_le_submit_getwork_result( CURL *curl, struct work *work ); bool std_be_submit_getwork_result( CURL *curl, struct work *work ); -bool jr2_submit_getwork_result( CURL *curl, struct work *work ); +bool jr2_submit_getwork_result( CURL *curl, struct work *work ); void std_le_build_stratum_request( char *req, struct work *work ); void std_be_build_stratum_request( char *req, struct work *work ); @@ -244,8 +225,8 @@ void set_work_data_big_endian( struct work *work ); double std_calc_network_diff( struct work *work ); void std_build_block_header( struct work* g_work, uint32_t version, - uint32_t *prevhash, uint32_t *merkle_root, - uint32_t ntime, uint32_t nbits ); + uint32_t *prevhash, uint32_t *merkle_root, + uint32_t ntime, uint32_t nbits ); void std_build_extraheader( struct work *work, struct stratum_ctx *sctx ); @@ -266,8 +247,8 @@ int std_get_work_data_size(); // by calling the algo's register function. bool register_algo_gate( int algo, algo_gate_t *gate ); -// Override any default gate functions that are applicable and do any other -// algo-specific initialization. +// Called by algos toverride any default gate functions that are applicable +// and do any other algo-specific initialization. // The register functions for all the algos can be declared here to reduce // compiler warnings but that's just more work for devs adding new algos. bool register_algo( algo_gate_t *gate ); @@ -280,5 +261,7 @@ bool register_json_rpc2( algo_gate_t *gate ); // use this to call the hash function of an algo directly, ie util.c test. void exec_hash_function( int algo, void *output, const void *pdata ); -void get_algo_alias( char** algo_or_alias ); +// Validate a string as a known algo and alias, updates arg to proper +// algo name if valid alias, NULL if invalid alias or algo. +void get_algo_alias( char **algo_or_alias ); diff --git a/algo/argon2/argon2a/argon2a.c b/algo/argon2/argon2a/argon2a.c index 94f6f2c..699e1fa 100644 --- a/algo/argon2/argon2a/argon2a.c +++ b/algo/argon2/argon2a/argon2a.c @@ -74,18 +74,12 @@ int scanhash_argon2( struct work* work, uint32_t max_nonce, return 0; } -int64_t argon2_get_max64 () -{ - return 0x1ffLL; -} - bool register_argon2_algo( algo_gate_t* gate ) { gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT; gate->scanhash = (void*)&scanhash_argon2; gate->hash = (void*)&argon2hash; gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root; - gate->get_max64 = (void*)&argon2_get_max64; opt_target_factor = 65536.0; return true; diff --git a/algo/argon2/argon2d/argon2d-gate.c b/algo/argon2/argon2d/argon2d-gate.c index fa6a206..300bf57 100644 --- a/algo/argon2/argon2d/argon2d-gate.c +++ b/algo/argon2/argon2d/argon2d-gate.c @@ -179,12 +179,9 @@ int scanhash_argon2d4096( struct work *work, uint32_t max_nonce, return 0; } -int64_t get_max64_0x1ff() { return 0x1ff; } - bool register_argon2d4096_algo( algo_gate_t* gate ) { gate->scanhash = (void*)&scanhash_argon2d4096; - gate->get_max64 = (void*)&get_max64_0x1ff; gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT; opt_target_factor = 65536.0; return true; diff --git a/algo/blake/blake-gate.c b/algo/blake/blake-gate.c index 7fc6480..7dd8d94 100644 --- a/algo/blake/blake-gate.c +++ b/algo/blake/blake-gate.c @@ -1,18 +1,8 @@ #include "blake-gate.h" -int64_t blake_get_max64 () -{ - return 0x7ffffLL; -} - bool register_blake_algo( algo_gate_t* gate ) { gate->optimizations = AVX2_OPT; - gate->get_max64 = (void*)&blake_get_max64; -//#if defined (__AVX2__) && defined (FOUR_WAY) -// gate->optimizations = SSE2_OPT | AVX2_OPT; -// gate->scanhash = (void*)&scanhash_blake_8way; -// gate->hash = (void*)&blakehash_8way; #if defined(BLAKE_4WAY) four_way_not_tested(); gate->scanhash = (void*)&scanhash_blake_4way; diff --git a/algo/blake/blake2b-gate.c b/algo/blake/blake2b-gate.c index e875e04..da8851c 100644 --- a/algo/blake/blake2b-gate.c +++ b/algo/blake/blake2b-gate.c @@ -1,13 +1,5 @@ #include "blake2b-gate.h" -/* -// changed to get_max64_0x3fffffLL in cpuminer-multi-decred -int64_t blake2s_get_max64 () -{ - return 0x7ffffLL; -} -*/ - bool register_blake2b_algo( algo_gate_t* gate ) { #if defined(BLAKE2B_4WAY) @@ -17,7 +9,6 @@ bool register_blake2b_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_blake2b; gate->hash = (void*)&blake2b_hash; #endif -// gate->get_max64 = (void*)&blake2s_get_max64; gate->optimizations = AVX2_OPT; return true; }; diff --git a/algo/blake/blake2s-gate.c b/algo/blake/blake2s-gate.c index 68ace1a..a35047f 100644 --- a/algo/blake/blake2s-gate.c +++ b/algo/blake/blake2s-gate.c @@ -1,12 +1,5 @@ #include "blake2s-gate.h" - -// changed to get_max64_0x3fffffLL in cpuminer-multi-decred -int64_t blake2s_get_max64 () -{ - return 0x7ffffLL; -} - bool register_blake2s_algo( algo_gate_t* gate ) { #if defined(BLAKE2S_8WAY) @@ -19,7 +12,6 @@ bool register_blake2s_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_blake2s; gate->hash = (void*)&blake2s_hash; #endif - gate->get_max64 = (void*)&blake2s_get_max64; gate->optimizations = SSE2_OPT | AVX2_OPT; return true; }; diff --git a/algo/blake/blake2s.c b/algo/blake/blake2s.c index aee4ce5..ec5b46f 100644 --- a/algo/blake/blake2s.c +++ b/algo/blake/blake2s.c @@ -70,18 +70,3 @@ int scanhash_blake2s( struct work *work, return 0; } -/* -// changed to get_max64_0x3fffffLL in cpuminer-multi-decred -int64_t blake2s_get_max64 () -{ - return 0x7ffffLL; -} - -bool register_blake2s_algo( algo_gate_t* gate ) -{ - gate->scanhash = (void*)&scanhash_blake2s; - gate->hash = (void*)&blake2s_hash; - gate->get_max64 = (void*)&blake2s_get_max64; - return true; -}; -*/ diff --git a/algo/blake/blakecoin-gate.c b/algo/blake/blakecoin-gate.c index 0429063..b4322b0 100644 --- a/algo/blake/blakecoin-gate.c +++ b/algo/blake/blakecoin-gate.c @@ -1,13 +1,6 @@ #include "blakecoin-gate.h" #include -// changed to get_max64_0x3fffffLL in cpuminer-multi-decred -int64_t blakecoin_get_max64 () -{ - return 0x7ffffLL; -// return 0x3fffffLL; -} - // vanilla uses default gen merkle root, otherwise identical to blakecoin bool register_vanilla_algo( algo_gate_t* gate ) { @@ -23,7 +16,6 @@ bool register_vanilla_algo( algo_gate_t* gate ) gate->hash = (void*)&blakecoinhash; #endif gate->optimizations = SSE42_OPT | AVX2_OPT; - gate->get_max64 = (void*)&blakecoin_get_max64; return true; } diff --git a/algo/blake/blakecoin.c b/algo/blake/blakecoin.c index f733c2c..3cea5d9 100644 --- a/algo/blake/blakecoin.c +++ b/algo/blake/blakecoin.c @@ -93,33 +93,3 @@ int scanhash_blakecoin( struct work *work, uint32_t max_nonce, return 0; } -/* -void blakecoin_gen_merkle_root ( char* merkle_root, struct stratum_ctx* sctx ) -{ - SHA256( sctx->job.coinbase, (int)sctx->job.coinbase_size, merkle_root ); -} -*/ -/* -// changed to get_max64_0x3fffffLL in cpuminer-multi-decred -int64_t blakecoin_get_max64 () -{ - return 0x7ffffLL; -} - -// vanilla uses default gen merkle root, otherwise identical to blakecoin -bool register_vanilla_algo( algo_gate_t* gate ) -{ - gate->scanhash = (void*)&scanhash_blakecoin; - gate->hash = (void*)&blakecoinhash; - gate->get_max64 = (void*)&blakecoin_get_max64; - blakecoin_init( &blake_init_ctx ); - return true; -} - -bool register_blakecoin_algo( algo_gate_t* gate ) -{ - register_vanilla_algo( gate ); - gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root; - return true; -} -*/ diff --git a/algo/blake/decred-gate.c b/algo/blake/decred-gate.c index 0e06c5e..ef58a81 100644 --- a/algo/blake/decred-gate.c +++ b/algo/blake/decred-gate.c @@ -38,7 +38,7 @@ void decred_decode_extradata( struct work* work, uint64_t* net_blocks ) if (!have_longpoll && work->height > *net_blocks + 1) { char netinfo[64] = { 0 }; - if (opt_showdiff && net_diff > 0.) + if ( net_diff > 0. ) { if (net_diff != work->targetdiff) sprintf(netinfo, ", diff %.3f, target %.1f", net_diff, @@ -154,7 +154,6 @@ bool register_decred_algo( algo_gate_t* gate ) #endif gate->optimizations = AVX2_OPT; gate->get_nonceptr = (void*)&decred_get_nonceptr; - gate->get_max64 = (void*)&get_max64_0x3fffffLL; gate->decode_extra_data = (void*)&decred_decode_extradata; gate->build_stratum_request = (void*)&decred_be_build_stratum_request; gate->work_decode = (void*)&std_be_work_decode; diff --git a/algo/blake/decred.c b/algo/blake/decred.c index 8645d2a..37ad1f3 100644 --- a/algo/blake/decred.c +++ b/algo/blake/decred.c @@ -143,7 +143,7 @@ void decred_decode_extradata( struct work* work, uint64_t* net_blocks ) if (!have_longpoll && work->height > *net_blocks + 1) { char netinfo[64] = { 0 }; - if (opt_showdiff && net_diff > 0.) + if (net_diff > 0.) { if (net_diff != work->targetdiff) sprintf(netinfo, ", diff %.3f, target %.1f", net_diff, @@ -269,7 +269,6 @@ bool register_decred_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_decred; gate->hash = (void*)&decred_hash; gate->get_nonceptr = (void*)&decred_get_nonceptr; - gate->get_max64 = (void*)&get_max64_0x3fffffLL; gate->decode_extra_data = (void*)&decred_decode_extradata; gate->build_stratum_request = (void*)&decred_be_build_stratum_request; gate->work_decode = (void*)&std_be_work_decode; diff --git a/algo/blake/pentablake-gate.c b/algo/blake/pentablake-gate.c index b194206..9d84486 100644 --- a/algo/blake/pentablake-gate.c +++ b/algo/blake/pentablake-gate.c @@ -10,7 +10,6 @@ bool register_pentablake_algo( algo_gate_t* gate ) gate->hash = (void*)&pentablakehash; #endif gate->optimizations = AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0x3ffff; return true; }; diff --git a/algo/bmw/bmw512-gate.c b/algo/bmw/bmw512-gate.c index e52b04a..48277ed 100644 --- a/algo/bmw/bmw512-gate.c +++ b/algo/bmw/bmw512-gate.c @@ -1,11 +1,8 @@ #include "bmw512-gate.h" -int64_t bmw512_get_max64() { return 0x7ffffLL; } - bool register_bmw512_algo( algo_gate_t* gate ) { gate->optimizations = AVX2_OPT; - gate->get_max64 = (void*)&bmw512_get_max64; opt_target_factor = 256.0; #if defined (BMW512_4WAY) gate->scanhash = (void*)&scanhash_bmw512_4way; diff --git a/algo/cryptonight/cryptolight.c b/algo/cryptonight/cryptolight.c index 6295be5..ca2923d 100644 --- a/algo/cryptonight/cryptolight.c +++ b/algo/cryptonight/cryptolight.c @@ -363,7 +363,6 @@ bool register_cryptolight_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_cryptolight; gate->hash = (void*)&cryptolight_hash; gate->hash_suw = (void*)&cryptolight_hash; - gate->get_max64 = (void*)&get_max64_0x40LL; return true; }; diff --git a/algo/cryptonight/cryptonight-common.c b/algo/cryptonight/cryptonight-common.c index e6c346b..2a5146f 100644 --- a/algo/cryptonight/cryptonight-common.c +++ b/algo/cryptonight/cryptonight-common.c @@ -111,7 +111,6 @@ bool register_cryptonight_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_cryptonight; gate->hash = (void*)&cryptonight_hash; gate->hash_suw = (void*)&cryptonight_hash_suw; - gate->get_max64 = (void*)&get_max64_0x40LL; return true; }; @@ -123,7 +122,6 @@ bool register_cryptonightv7_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_cryptonight; gate->hash = (void*)&cryptonight_hash; gate->hash_suw = (void*)&cryptonight_hash_suw; - gate->get_max64 = (void*)&get_max64_0x40LL; return true; }; diff --git a/algo/groestl/groestl.c b/algo/groestl/groestl.c index 30f2599..df1c2c3 100644 --- a/algo/groestl/groestl.c +++ b/algo/groestl/groestl.c @@ -100,7 +100,6 @@ bool register_dmd_gr_algo( algo_gate_t* gate ) gate->optimizations = SSE2_OPT | AES_OPT; gate->scanhash = (void*)&scanhash_groestl; gate->hash = (void*)&groestlhash; - gate->get_max64 = (void*)&get_max64_0x3ffff; opt_target_factor = 256.0; return true; }; diff --git a/algo/groestl/myr-groestl.c b/algo/groestl/myr-groestl.c index 5a3b897..bff0360 100644 --- a/algo/groestl/myr-groestl.c +++ b/algo/groestl/myr-groestl.c @@ -88,15 +88,3 @@ int scanhash_myriad( struct work *work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } -/* -bool register_myriad_algo( algo_gate_t* gate ) -{ - gate->optimizations = SSE2_OPT | AES_OPT; - init_myrgr_ctx(); - gate->scanhash = (void*)&scanhash_myriad; - gate->hash = (void*)&myriadhash; -// gate->hash_alt = (void*)&myriadhash; - gate->get_max64 = (void*)&get_max64_0x3ffff; - return true; -}; -*/ diff --git a/algo/groestl/myrgr-gate.c b/algo/groestl/myrgr-gate.c index aa8ebd8..7f8e185 100644 --- a/algo/groestl/myrgr-gate.c +++ b/algo/groestl/myrgr-gate.c @@ -12,7 +12,6 @@ bool register_myriad_algo( algo_gate_t* gate ) gate->hash = (void*)&myriad_hash; #endif gate->optimizations = AES_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0x3ffff; return true; }; diff --git a/algo/keccak/keccak-gate.c b/algo/keccak/keccak-gate.c index c963def..0ebc2d4 100644 --- a/algo/keccak/keccak-gate.c +++ b/algo/keccak/keccak-gate.c @@ -1,12 +1,10 @@ #include "keccak-gate.h" -int64_t keccak_get_max64() { return 0x7ffffLL; } bool register_keccak_algo( algo_gate_t* gate ) { gate->optimizations = AVX2_OPT; gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root; - gate->get_max64 = (void*)&keccak_get_max64; opt_target_factor = 128.0; #if defined (KECCAK_4WAY) gate->scanhash = (void*)&scanhash_keccak_4way; @@ -22,7 +20,6 @@ bool register_keccakc_algo( algo_gate_t* gate ) { gate->optimizations = AVX2_OPT; gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root; - gate->get_max64 = (void*)&keccak_get_max64; opt_target_factor = 256.0; #if defined (KECCAK_4WAY) gate->scanhash = (void*)&scanhash_keccak_4way; diff --git a/algo/lyra2/lyra2-gate.c b/algo/lyra2/lyra2-gate.c index 38db1d6..b608ba2 100644 --- a/algo/lyra2/lyra2-gate.c +++ b/algo/lyra2/lyra2-gate.c @@ -127,7 +127,6 @@ bool register_lyra2z_algo( algo_gate_t* gate ) gate->hash = (void*)&lyra2z_hash; #endif gate->optimizations = SSE42_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0xffffLL; opt_target_factor = 256.0; return true; }; @@ -147,15 +146,12 @@ bool register_lyra2h_algo( algo_gate_t* gate ) gate->hash = (void*)&lyra2h_hash; #endif gate->optimizations = SSE42_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0xffffLL; opt_target_factor = 256.0; return true; }; ///////////////////////////////// -int64_t allium_get_max64_0xFFFFLL() { return 0xFFFFLL; } - bool register_allium_algo( algo_gate_t* gate ) { #if defined (ALLIUM_4WAY) @@ -168,7 +164,6 @@ bool register_allium_algo( algo_gate_t* gate ) gate->hash = (void*)&allium_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT; - gate->get_max64 = (void*)&allium_get_max64_0xFFFFLL; opt_target_factor = 256.0; return true; }; @@ -214,7 +209,6 @@ bool register_phi2_algo( algo_gate_t* gate ) gate->get_work_data_size = (void*)&phi2_get_work_data_size; gate->decode_extra_data = (void*)&phi2_decode_extra_data; gate->build_extraheader = (void*)&phi2_build_extraheader; - gate->get_max64 = (void*)&get_max64_0xffffLL; opt_target_factor = 256.0; #if defined(PHI2_4WAY) gate->scanhash = (void*)&scanhash_phi2_4way; diff --git a/algo/lyra2/lyra2re.c b/algo/lyra2/lyra2re.c index 62a164c..024016b 100644 --- a/algo/lyra2/lyra2re.c +++ b/algo/lyra2/lyra2re.c @@ -113,18 +113,12 @@ int scanhash_lyra2re( struct work *work, uint32_t max_nonce, return 0; } -int64_t lyra2re_get_max64 () -{ - return 0xffffLL; -} - bool register_lyra2re_algo( algo_gate_t* gate ) { init_lyra2re_ctx(); gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT; gate->scanhash = (void*)&scanhash_lyra2re; gate->hash = (void*)&lyra2re_hash; - gate->get_max64 = (void*)&lyra2re_get_max64; opt_target_factor = 128.0; return true; }; diff --git a/algo/lyra2/lyra2z330.c b/algo/lyra2/lyra2z330.c index 4eb8640..24af0dd 100644 --- a/algo/lyra2/lyra2z330.c +++ b/algo/lyra2/lyra2z330.c @@ -70,7 +70,6 @@ bool register_lyra2z330_algo( algo_gate_t* gate ) gate->miner_thread_init = (void*)&lyra2z330_thread_init; gate->scanhash = (void*)&scanhash_lyra2z330; gate->hash = (void*)&lyra2z330_hash; - gate->get_max64 = (void*)&get_max64_0xffffLL; opt_target_factor = 256.0; return true; }; diff --git a/algo/m7m.c b/algo/m7m.c index 7d847aa..c2e37ba 100644 --- a/algo/m7m.c +++ b/algo/m7m.c @@ -296,8 +296,6 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce, pdata[19] = n; -// can this be skipped after finding a share? Seems to work ok. -//out: mpf_set_prec_raw(magifpi, prec0); mpf_set_prec_raw(magifpi0, prec0); mpf_set_prec_raw(mptmp, prec0); @@ -323,7 +321,6 @@ bool register_m7m_algo( algo_gate_t *gate ) gate->build_stratum_request = (void*)&std_be_build_stratum_request; gate->work_decode = (void*)&std_be_work_decode; gate->submit_getwork_result = (void*)&std_be_submit_getwork_result; - gate->get_max64 = (void*)&get_max64_0x1ffff; gate->set_work_data_endian = (void*)&set_work_data_big_endian; opt_target_factor = 65536.0; return true; diff --git a/algo/nist5/zr5.c b/algo/nist5/zr5.c index 9ec6e19..7a39a1b 100644 --- a/algo/nist5/zr5.c +++ b/algo/nist5/zr5.c @@ -208,12 +208,6 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id, ++(*nonceptr); } -int64_t zr5_get_max64 () -{ -// return 0x1ffffLL; - return 0x1fffffLL; -} - void zr5_display_pok( struct work* work ) { if ( work->data[0] & 0x00008000 ) @@ -229,7 +223,6 @@ bool register_zr5_algo( algo_gate_t* gate ) gate->get_new_work = (void*)&zr5_get_new_work; gate->scanhash = (void*)&scanhash_zr5; gate->hash = (void*)&zr5hash; - gate->get_max64 = (void*)&zr5_get_max64; gate->decode_extra_data = (void*)&zr5_display_pok; gate->build_stratum_request = (void*)&std_be_build_stratum_request; gate->work_decode = (void*)&std_be_work_decode; diff --git a/algo/ripemd/lbry-gate.c b/algo/ripemd/lbry-gate.c index ec9af26..8e55867 100644 --- a/algo/ripemd/lbry-gate.c +++ b/algo/ripemd/lbry-gate.c @@ -94,8 +94,6 @@ void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) g_work->data[28] = 0x80000000; } -int64_t lbry_get_max64() { return 0x1ffffLL; } - int lbry_get_work_data_size() { return LBRY_WORK_DATA_SIZE; } bool register_lbry_algo( algo_gate_t* gate ) @@ -112,7 +110,6 @@ bool register_lbry_algo( algo_gate_t* gate ) gate->hash = (void*)&lbry_hash; #endif gate->calc_network_diff = (void*)&lbry_calc_network_diff; - gate->get_max64 = (void*)&lbry_get_max64; gate->build_stratum_request = (void*)&lbry_le_build_stratum_request; // gate->build_block_header = (void*)&build_block_header; gate->build_extraheader = (void*)&lbry_build_extraheader; diff --git a/algo/scrypt/neoscrypt.c b/algo/scrypt/neoscrypt.c index c4d6f6d..9003e59 100644 --- a/algo/scrypt/neoscrypt.c +++ b/algo/scrypt/neoscrypt.c @@ -1070,17 +1070,6 @@ int scanhash_neoscrypt( struct work *work, return 0; } -int64_t get_neoscrypt_max64() { return 0x3ffff; } - -void neoscrypt_wait_for_diff( struct stratum_ctx *stratum ) -{ - while ( !stratum->job.diff ) - { -// applog(LOG_DEBUG, "Waiting for Stratum to set the job difficulty"); - sleep(1); - } -} - int neoscrypt_get_work_data_size () { return 80; } bool register_neoscrypt_algo( algo_gate_t* gate ) @@ -1088,8 +1077,6 @@ bool register_neoscrypt_algo( algo_gate_t* gate ) gate->optimizations = SSE2_OPT; gate->scanhash = (void*)&scanhash_neoscrypt; gate->hash = (void*)&neoscrypt; - gate->get_max64 = (void*)&get_neoscrypt_max64; - gate->wait_for_diff = (void*)&neoscrypt_wait_for_diff; gate->build_stratum_request = (void*)&std_be_build_stratum_request; gate->work_decode = (void*)&std_be_work_decode; gate->submit_getwork_result = (void*)&std_be_submit_getwork_result; diff --git a/algo/scrypt/pluck.c b/algo/scrypt/pluck.c index 44d49b8..a0e7275 100644 --- a/algo/scrypt/pluck.c +++ b/algo/scrypt/pluck.c @@ -483,11 +483,6 @@ int scanhash_pluck( struct work *work, uint32_t max_nonce, return 0; } -int64_t pluck_get_max64 () -{ - return 0x1ffLL; -} - bool pluck_miner_thread_init( int thr_id ) { scratchbuf = malloc( 128 * 1024 ); @@ -503,7 +498,6 @@ bool register_pluck_algo( algo_gate_t* gate ) gate->miner_thread_init = (void*)&pluck_miner_thread_init; gate->scanhash = (void*)&scanhash_pluck; gate->hash = (void*)&pluck_hash; - gate->get_max64 = (void*)&pluck_get_max64; opt_target_factor = 65536.0; return true; }; diff --git a/algo/scrypt/scrypt.c b/algo/scrypt/scrypt.c index 4143fb1..68bce4a 100644 --- a/algo/scrypt/scrypt.c +++ b/algo/scrypt/scrypt.c @@ -766,8 +766,6 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce, return 0; } -int64_t scrypt_get_max64() { return 0xfff; } - bool scrypt_miner_thread_init( int thr_id ) { scratchbuf = scrypt_buffer_alloc( scratchbuf_size ); @@ -783,10 +781,8 @@ bool register_scrypt_algo( algo_gate_t* gate ) gate->miner_thread_init =(void*)&scrypt_miner_thread_init; gate->scanhash = (void*)&scanhash_scrypt; // gate->hash = (void*)&scrypt_1024_1_1_256_24way; - gate->get_max64 = (void*)&scrypt_get_max64; opt_target_factor = 65536.0; - if ( !opt_param_n ) { opt_param_n = 1024; diff --git a/algo/scryptjane/scrypt-jane.c b/algo/scryptjane/scrypt-jane.c index 5329b4f..ea1b463 100644 --- a/algo/scryptjane/scrypt-jane.c +++ b/algo/scryptjane/scrypt-jane.c @@ -240,7 +240,6 @@ bool register_scryptjane_algo( algo_gate_t* gate ) { gate->scanhash = (void*)&scanhash_scryptjane; gate->hash = (void*)&scryptjanehash; - gate->get_max64 = (void*)&get_max64_0x40LL; opt_target_factor = 65536.0; // figure out if arg in N or Nfactor diff --git a/algo/sha/sha256t-gate.c b/algo/sha/sha256t-gate.c index 0271234..ba7f95d 100644 --- a/algo/sha/sha256t-gate.c +++ b/algo/sha/sha256t-gate.c @@ -15,7 +15,6 @@ bool register_sha256t_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_sha256t; gate->hash = (void*)&sha256t_hash; #endif - gate->get_max64 = (void*)&get_max64_0x3ffff; return true; } @@ -34,7 +33,6 @@ bool register_sha256q_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_sha256q; gate->hash = (void*)&sha256q_hash; #endif - gate->get_max64 = (void*)&get_max64_0x3ffff; return true; } diff --git a/algo/skein/skein-gate.c b/algo/skein/skein-gate.c index f41c874..6acdc19 100644 --- a/algo/skein/skein-gate.c +++ b/algo/skein/skein-gate.c @@ -2,8 +2,6 @@ #include "sph_skein.h" #include "skein-hash-4way.h" -int64_t skein_get_max64() { return 0x7ffffLL; } - bool register_skein_algo( algo_gate_t* gate ) { gate->optimizations = AVX2_OPT | SHA_OPT; @@ -14,7 +12,6 @@ bool register_skein_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_skein; gate->hash = (void*)&skeinhash; #endif - gate->get_max64 = (void*)&skein_get_max64; return true; }; diff --git a/algo/skein/skein2-gate.c b/algo/skein/skein2-gate.c index 34483b2..d40e2c4 100644 --- a/algo/skein/skein2-gate.c +++ b/algo/skein/skein2-gate.c @@ -2,11 +2,6 @@ #include #include "sph_skein.h" -int64_t skein2_get_max64 () -{ - return 0x7ffffLL; -} - bool register_skein2_algo( algo_gate_t* gate ) { gate->optimizations = AVX2_OPT; @@ -17,7 +12,6 @@ bool register_skein2_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_skein2; gate->hash = (void*)&skein2hash; #endif - gate->get_max64 = (void*)&skein2_get_max64; return true; }; diff --git a/algo/sm3/sm3-hash-4way.c b/algo/sm3/sm3-hash-4way.c index 501642f..f900aba 100644 --- a/algo/sm3/sm3-hash-4way.c +++ b/algo/sm3/sm3-hash-4way.c @@ -181,7 +181,7 @@ void sm3_4way_compress( __m128i *digest, __m128i *block ) for( j =0; j < 16; j++ ) { SS1 = mm128_rol_32( _mm_add_epi32( _mm_add_epi32( mm128_rol_32(A,12), E ), - mm128_rol_32( T, j ) ), 7 ); + mm128_rol_var_32( T, j ) ), 7 ); SS2 = _mm_xor_si128( SS1, mm128_rol_32( A, 12 ) ); TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF0( A, B, C ), D ), SS2 ), W1[j] ); @@ -200,9 +200,8 @@ void sm3_4way_compress( __m128i *digest, __m128i *block ) T = _mm_set1_epi32( 0x7A879D8AUL ); for( j =16; j < 64; j++ ) { - // AVX512 _mm_rol_epi32 doesn't like using a variable for the second arg. SS1 = mm128_rol_32( _mm_add_epi32( _mm_add_epi32( mm128_rol_32(A,12), E ), - mm128_rol_32( T, j&31 ) ), 7 ); + mm128_rol_var_32( T, j&31 ) ), 7 ); SS2 = _mm_xor_si128( SS1, mm128_rol_32( A, 12 ) ); TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF1( A, B, C ), D ), SS2 ), W1[j] ); diff --git a/algo/x11/c11-gate.c b/algo/x11/c11-gate.c index 30c719b..d087cc9 100644 --- a/algo/x11/c11-gate.c +++ b/algo/x11/c11-gate.c @@ -12,7 +12,6 @@ bool register_c11_algo( algo_gate_t* gate ) gate->hash = (void*)&c11_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0x3ffff; return true; }; diff --git a/algo/x11/fresh.c b/algo/x11/fresh.c index d1994b0..d81cc2f 100644 --- a/algo/x11/fresh.c +++ b/algo/x11/fresh.c @@ -125,7 +125,6 @@ bool register_fresh_algo( algo_gate_t* gate ) algo_not_tested(); gate->scanhash = (void*)&scanhash_fresh; gate->hash = (void*)&freshhash; - gate->get_max64 = (void*)&get_max64_0x3ffff; opt_target_factor = 256.0; return true; }; diff --git a/algo/x11/timetravel-gate.c b/algo/x11/timetravel-gate.c index 311b3ba..370ef39 100644 --- a/algo/x11/timetravel-gate.c +++ b/algo/x11/timetravel-gate.c @@ -12,7 +12,6 @@ bool register_timetravel_algo( algo_gate_t* gate ) gate->hash = (void*)&timetravel_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0xffffLL; opt_target_factor = 256.0; return true; }; diff --git a/algo/x11/timetravel10-gate.c b/algo/x11/timetravel10-gate.c index 3c60bbf..8c21229 100644 --- a/algo/x11/timetravel10-gate.c +++ b/algo/x11/timetravel10-gate.c @@ -12,7 +12,6 @@ bool register_timetravel10_algo( algo_gate_t* gate ) gate->hash = (void*)&timetravel10_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0xffffLL; opt_target_factor = 256.0; return true; }; diff --git a/algo/x11/tribus-gate.c b/algo/x11/tribus-gate.c index f30d65e..9c9c1ae 100644 --- a/algo/x11/tribus-gate.c +++ b/algo/x11/tribus-gate.c @@ -3,7 +3,6 @@ bool register_tribus_algo( algo_gate_t* gate ) { gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0x1ffff; #if defined (TRIBUS_4WAY) // init_tribus_4way_ctx(); gate->scanhash = (void*)&scanhash_tribus_4way; diff --git a/algo/x11/x11-gate.c b/algo/x11/x11-gate.c index adad370..c3917a1 100644 --- a/algo/x11/x11-gate.c +++ b/algo/x11/x11-gate.c @@ -12,7 +12,6 @@ bool register_x11_algo( algo_gate_t* gate ) gate->hash = (void*)&x11_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0x3ffff; return true; }; diff --git a/algo/x11/x11gost-gate.c b/algo/x11/x11gost-gate.c index 0d53551..3cb5bc2 100644 --- a/algo/x11/x11gost-gate.c +++ b/algo/x11/x11gost-gate.c @@ -12,7 +12,6 @@ bool register_x11gost_algo( algo_gate_t* gate ) gate->hash = (void*)&x11gost_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0x3ffff; return true; }; diff --git a/algo/x12/x12-gate.c b/algo/x12/x12-gate.c index 5220c10..05f7173 100644 --- a/algo/x12/x12-gate.c +++ b/algo/x12/x12-gate.c @@ -12,7 +12,6 @@ bool register_x12_algo( algo_gate_t* gate ) gate->hash = (void*)&x12hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0x3ffff; return true; }; diff --git a/algo/x13/phi1612-gate.c b/algo/x13/phi1612-gate.c index 9a9d871..65861eb 100644 --- a/algo/x13/phi1612-gate.c +++ b/algo/x13/phi1612-gate.c @@ -12,7 +12,6 @@ bool register_phi1612_algo( algo_gate_t* gate ) gate->hash = (void*)&phi1612_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0x3ffff; return true; }; diff --git a/algo/x13/x13-gate.c b/algo/x13/x13-gate.c index ce3e640..60973d3 100644 --- a/algo/x13/x13-gate.c +++ b/algo/x13/x13-gate.c @@ -12,7 +12,6 @@ bool register_x13_algo( algo_gate_t* gate ) gate->hash = (void*)&x13hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0x3ffff; return true; }; diff --git a/algo/x13/x13sm3-gate.c b/algo/x13/x13sm3-gate.c index c4c348b..bc0fb92 100644 --- a/algo/x13/x13sm3-gate.c +++ b/algo/x13/x13sm3-gate.c @@ -12,7 +12,6 @@ bool register_x13sm3_algo( algo_gate_t* gate ) gate->hash = (void*)&x13sm3_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0x3ffff; return true; }; @@ -28,7 +27,6 @@ bool register_x13bcd_algo( algo_gate_t* gate ) gate->hash = (void*)&x13bcd_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0x3ffff; return true; }; diff --git a/algo/x14/axiom.c b/algo/x14/axiom.c index 7c0b70a..13a0b73 100644 --- a/algo/x14/axiom.c +++ b/algo/x14/axiom.c @@ -83,6 +83,5 @@ bool register_axiom_algo( algo_gate_t* gate ) { gate->scanhash = (void*)&scanhash_axiom; gate->hash = (void*)&axiomhash; - gate->get_max64 = (void*)&get_max64_0x40LL; return true; } diff --git a/algo/x14/polytimos-gate.c b/algo/x14/polytimos-gate.c index aa54060..41f609c 100644 --- a/algo/x14/polytimos-gate.c +++ b/algo/x14/polytimos-gate.c @@ -11,7 +11,6 @@ bool register_polytimos_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_polytimos; gate->hash = (void*)&polytimos_hash; #endif - gate->get_max64 = (void*)&get_max64_0x3ffff; return true; }; diff --git a/algo/x14/veltor-gate.c b/algo/x14/veltor-gate.c index 5e7e74b..b14613f 100644 --- a/algo/x14/veltor-gate.c +++ b/algo/x14/veltor-gate.c @@ -12,7 +12,6 @@ bool register_veltor_algo( algo_gate_t* gate ) gate->hash = (void*)&veltor_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0x3ffff; return true; }; diff --git a/algo/x14/x14-gate.c b/algo/x14/x14-gate.c index d02c305..013aa10 100644 --- a/algo/x14/x14-gate.c +++ b/algo/x14/x14-gate.c @@ -12,7 +12,6 @@ bool register_x14_algo( algo_gate_t* gate ) gate->hash = (void*)&x14hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0x3ffff; return true; }; diff --git a/algo/x17/sonoa-gate.c b/algo/x17/sonoa-gate.c index b420564..fea4d39 100644 --- a/algo/x17/sonoa-gate.c +++ b/algo/x17/sonoa-gate.c @@ -11,7 +11,6 @@ bool register_sonoa_algo( algo_gate_t* gate ) gate->scanhash = (void*)&scanhash_sonoa; gate->hash = (void*)&sonoa_hash; #endif - gate->get_max64 = (void*)&get_max64_0x1ffff; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; return true; }; diff --git a/algo/x17/xevan-gate.c b/algo/x17/xevan-gate.c index 52195a1..96b811c 100644 --- a/algo/x17/xevan-gate.c +++ b/algo/x17/xevan-gate.c @@ -12,7 +12,6 @@ bool register_xevan_algo( algo_gate_t* gate ) gate->hash = (void*)&xevan_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; - gate->get_max64 = (void*)&get_max64_0xffffLL; opt_target_factor = 256.0; return true; }; diff --git a/algo/yescrypt/yescrypt.c b/algo/yescrypt/yescrypt.c index 44fdabd..fb39d2e 100644 --- a/algo/yescrypt/yescrypt.c +++ b/algo/yescrypt/yescrypt.c @@ -416,16 +416,6 @@ int scanhash_yescrypt( struct work *work, uint32_t max_nonce, return 0; } -int64_t yescrypt_get_max64() -{ - return 0x1ffLL; -} - -int64_t yescryptr16_get_max64() -{ - return 0xfffLL; -} - void yescrypt_gate_base(algo_gate_t *gate ) { gate->optimizations = SSE2_OPT | SHA_OPT; @@ -437,7 +427,6 @@ void yescrypt_gate_base(algo_gate_t *gate ) bool register_yescrypt_algo( algo_gate_t* gate ) { yescrypt_gate_base( gate ); - gate->get_max64 = (void*)&yescrypt_get_max64; if ( opt_param_n ) YESCRYPT_N = opt_param_n; else YESCRYPT_N = 2048; @@ -469,7 +458,6 @@ bool register_yescrypt_algo( algo_gate_t* gate ) bool register_yescryptr8_algo( algo_gate_t* gate ) { yescrypt_gate_base( gate ); - gate->get_max64 = (void*)&yescrypt_get_max64; yescrypt_client_key = "Client Key"; yescrypt_client_key_len = 10; YESCRYPT_N = 2048; @@ -481,7 +469,6 @@ bool register_yescryptr8_algo( algo_gate_t* gate ) bool register_yescryptr16_algo( algo_gate_t* gate ) { yescrypt_gate_base( gate ); - gate->get_max64 = (void*)&yescryptr16_get_max64; yescrypt_client_key = "Client Key"; yescrypt_client_key_len = 10; YESCRYPT_N = 4096; @@ -493,7 +480,6 @@ bool register_yescryptr16_algo( algo_gate_t* gate ) bool register_yescryptr32_algo( algo_gate_t* gate ) { yescrypt_gate_base( gate ); - gate->get_max64 = (void*)&yescryptr16_get_max64; yescrypt_client_key = "WaviBanana"; yescrypt_client_key_len = 10; YESCRYPT_N = 4096; diff --git a/algo/yespower/yespower-gate.c b/algo/yespower/yespower-gate.c index 3d5845a..7384869 100644 --- a/algo/yespower/yespower-gate.c +++ b/algo/yespower/yespower-gate.c @@ -109,11 +109,6 @@ int scanhash_yespower_b2b( struct work *work, uint32_t max_nonce, return 0; } -int64_t yespower_get_max64() -{ - return 0xfffLL; -} - bool register_yespower_algo( algo_gate_t* gate ) { yespower_params.version = YESPOWER_1_0; @@ -141,7 +136,6 @@ bool register_yespower_algo( algo_gate_t* gate ) applog( LOG_NOTICE,"Key= \"%s\"\n", yespower_params.pers ); gate->optimizations = SSE2_OPT; - gate->get_max64 = (void*)&yespower_get_max64; gate->scanhash = (void*)&scanhash_yespower; gate->hash = (void*)&yespower_hash; opt_target_factor = 65536.0; @@ -156,7 +150,6 @@ bool register_yespowerr16_algo( algo_gate_t* gate ) yespower_params.pers = NULL; yespower_params.perslen = 0; gate->optimizations = SSE2_OPT; - gate->get_max64 = (void*)&yespower_get_max64; gate->scanhash = (void*)&scanhash_yespower; gate->hash = (void*)&yespower_hash; opt_target_factor = 65536.0; @@ -164,21 +157,10 @@ bool register_yespowerr16_algo( algo_gate_t* gate ) }; -int64_t yescrypt_05_get_max64() -{ - return 0x1ffLL; -} - -int64_t yescryptr16_05_get_max64() -{ - return 0xfffLL; -} - bool register_yescrypt_05_algo( algo_gate_t* gate ) { gate->optimizations = SSE2_OPT | SHA_OPT; gate->scanhash = (void*)&scanhash_yespower; - gate->get_max64 = (void*)&yescrypt_05_get_max64; yespower_params.version = YESPOWER_0_5; yespower_params.N = 2048; yespower_params.r = 8; @@ -192,7 +174,6 @@ bool register_yescryptr8_05_algo( algo_gate_t* gate ) { gate->optimizations = SSE2_OPT | SHA_OPT; gate->scanhash = (void*)&scanhash_yespower; - gate->get_max64 = (void*)&yescrypt_05_get_max64; yespower_params.version = YESPOWER_0_5; yespower_params.N = 2048; yespower_params.r = 8; @@ -206,7 +187,6 @@ bool register_yescryptr16_05_algo( algo_gate_t* gate ) { gate->optimizations = SSE2_OPT | SHA_OPT; gate->scanhash = (void*)&scanhash_yespower; - gate->get_max64 = (void*)&yescryptr16_05_get_max64; yespower_params.version = YESPOWER_0_5; yespower_params.N = 4096; yespower_params.r = 16; @@ -220,7 +200,6 @@ bool register_yescryptr32_05_algo( algo_gate_t* gate ) { gate->optimizations = SSE2_OPT | SHA_OPT; gate->scanhash = (void*)&scanhash_yespower; - gate->get_max64 = (void*)&yescryptr16_05_get_max64; yespower_params.version = YESPOWER_0_5; yespower_params.N = 4096; yespower_params.r = 32; @@ -245,7 +224,6 @@ bool register_power2b_algo( algo_gate_t* gate ) applog( LOG_NOTICE,"Key length= %d\n", yespower_params.perslen ); gate->optimizations = SSE2_OPT; - gate->get_max64 = (void*)&yespower_get_max64; gate->scanhash = (void*)&scanhash_yespower_b2b; gate->hash = (void*)&yespower_b2b_hash; opt_target_factor = 65536.0; @@ -286,7 +264,6 @@ bool register_yespower_b2b_algo( algo_gate_t* gate ) } gate->optimizations = SSE2_OPT; - gate->get_max64 = (void*)&yespower_get_max64; gate->scanhash = (void*)&scanhash_yespower_b2b; gate->hash = (void*)&yespower_b2b_hash; opt_target_factor = 65536.0; diff --git a/api.c b/api.c index 8999397..60855b4 100644 --- a/api.c +++ b/api.c @@ -32,7 +32,7 @@ #include #include "miner.h" - +#include "sysinfos.c" #ifndef WIN32 # include # include @@ -105,7 +105,7 @@ extern double global_hashrate; #define USE_MONITORING extern float cpu_temp(int); extern uint32_t cpu_clock(int); -extern int cpu_fanpercent(void); +//extern int cpu_fanpercent(void); /***************************************************************/ diff --git a/configure b/configure index 50f9a5e..84d13ec 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.9. +# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.9.1. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -577,8 +577,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt' -PACKAGE_VERSION='3.9.9' -PACKAGE_STRING='cpuminer-opt 3.9.9' +PACKAGE_VERSION='3.9.9.1' +PACKAGE_STRING='cpuminer-opt 3.9.9.1' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures cpuminer-opt 3.9.9 to adapt to many kinds of systems. +\`configure' configures cpuminer-opt 3.9.9.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1404,7 +1404,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of cpuminer-opt 3.9.9:";; + short | recursive ) echo "Configuration of cpuminer-opt 3.9.9.1:";; esac cat <<\_ACEOF @@ -1509,7 +1509,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -cpuminer-opt configure 3.9.9 +cpuminer-opt configure 3.9.9.1 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by cpuminer-opt $as_me 3.9.9, which was +It was created by cpuminer-opt $as_me 3.9.9.1, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2993,7 +2993,7 @@ fi # Define the identity of the package. PACKAGE='cpuminer-opt' - VERSION='3.9.9' + VERSION='3.9.9.1' cat >>confdefs.h <<_ACEOF @@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by cpuminer-opt $as_me 3.9.9, which was +This file was extended by cpuminer-opt $as_me 3.9.9.1, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -6756,7 +6756,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -cpuminer-opt config.status 3.9.9 +cpuminer-opt config.status 3.9.9.1 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index f947f4f..4a049db 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([cpuminer-opt], [3.9.9]) +AC_INIT([cpuminer-opt], [3.9.9.1]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/cpu-miner.c b/cpu-miner.c index fe11840..5bad7af 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -37,6 +37,7 @@ #include #include #include +#include "sysinfos.c" #ifdef WIN32 #include @@ -81,7 +82,6 @@ bool opt_debug_diff = false; bool opt_protocol = false; bool opt_benchmark = false; bool opt_redirect = true; -bool opt_showdiff = true; bool opt_extranonce = true; bool want_longpoll = true; bool have_longpoll = false; @@ -110,12 +110,16 @@ int opt_n_threads = 0; bool opt_reset_on_stale = false; // Windows doesn't support 128 bit affinity mask. +// Need compile time and run time test. #if defined(__linux) && defined(GCC_INT128) #define AFFINITY_USES_UINT128 1 -uint128_t opt_affinity = -1LL; +uint128_t opt_affinity = -1; +static bool affinity_uses_uint128 = true; #else -uint64_t opt_affinity = -1LL; +uint64_t opt_affinity = -1; +static bool affinity_uses_uint128 = false; #endif + int opt_priority = 0; int num_cpus = 1; int num_cpugroups = 1; @@ -148,9 +152,10 @@ double opt_target_factor = 1.0; uint32_t zr5_pok = 0; bool opt_stratum_stats = false; bool opt_hash_meter = false; -uint32_t accepted_share_count = 0ULL; -uint32_t rejected_share_count = 0ULL; -uint32_t solved_block_count = 0ULL; +uint32_t submitted_share_count= 0; +uint32_t accepted_share_count = 0; +uint32_t rejected_share_count = 0; +uint32_t solved_block_count = 0; double *thr_hashrates; double *thr_hashcount; double global_hashcount = 0; @@ -212,9 +217,9 @@ static inline void drop_policy(void) // Linux affinity can use int128. #if AFFINITY_USES_UINT128 -static void affine_to_cpu_mask( int id, unsigned __int128 mask ) +static void affine_to_cpu_mask( int id, uint128_t mask ) #else -static void affine_to_cpu_mask( int id, unsigned long long mask ) +static void affine_to_cpu_mask( int id, uint64_t mask ) #endif { cpu_set_t set; @@ -225,9 +230,9 @@ static void affine_to_cpu_mask( int id, unsigned long long mask ) { // cpu mask #if AFFINITY_USES_UINT128 - if( ( mask & ( (unsigned __int128)1ULL << i ) ) ) CPU_SET( i, &set ); + if( ( mask & ( (uint128_t)1 << i ) ) ) CPU_SET( i, &set ); #else - if( (ncpus > 64) || ( mask & (1ULL << i) ) ) CPU_SET( i, &set ); + if( (ncpus > 64) || ( mask & (1 << i) ) ) CPU_SET( i, &set ); #endif } if ( id == -1 ) @@ -246,7 +251,7 @@ static void affine_to_cpu_mask( int id, unsigned long long mask ) static inline void drop_policy(void) { } // Windows CPU groups to manage more than 64 CPUs. -static void affine_to_cpu_mask( int id, unsigned long mask ) +static void affine_to_cpu_mask( int id, uint64_t mask ) { bool success; unsigned long last_error; @@ -254,45 +259,44 @@ static void affine_to_cpu_mask( int id, unsigned long mask ) // DWORD last_error; if ( id == -1 ) - success = SetProcessAffinityMask( GetCurrentProcess(), mask ); + success = SetProcessAffinityMask( GetCurrentProcess(), mask ); // Are Windows CPU Groups supported? #if _WIN32_WINNT==0x0601 else if ( num_cpugroups == 1 ) - success = SetThreadAffinityMask( GetCurrentThread(), mask ); + success = SetThreadAffinityMask( GetCurrentThread(), mask ); else { - // Find the correct cpu group - int cpu = id % num_cpus; - int group; - for( group = 0; group < num_cpugroups; group++ ) - { - int cpus = GetActiveProcessorCount( group ); - if ( cpu < cpus ) - break; + // Find the correct cpu group + int cpu = id % num_cpus; + int group; + for( group = 0; group < num_cpugroups; group++ ) + { + int cpus = GetActiveProcessorCount( group ); + if ( cpu < cpus ) break; + cpu -= cpus; + } - cpu -= cpus; - } + if (opt_debug) + applog(LOG_DEBUG, "Binding thread %d to cpu %d on cpu group %d (mask %x)", + id, cpu, group, (1ULL << cpu)); - if (opt_debug) - applog(LOG_DEBUG, "Binding thread %d to cpu %d on cpu group %d (mask %x)", id, cpu, group, (1ULL << cpu)); - - GROUP_AFFINITY affinity; - affinity.Group = group; - affinity.Mask = 1ULL << cpu; - success = SetThreadGroupAffinity( GetCurrentThread(), &affinity, NULL ); + GROUP_AFFINITY affinity; + affinity.Group = group; + affinity.Mask = 1ULL << cpu; + success = SetThreadGroupAffinity( GetCurrentThread(), &affinity, NULL ); } #else else - success = SetThreadAffinityMask( GetCurrentThread(), mask ); + success = SetThreadAffinityMask( GetCurrentThread(), mask ); #endif if (!success) { - last_error = GetLastError(); - applog(LOG_WARNING, "affine_to_cpu_mask for %u returned %x", id, last_error); + last_error = GetLastError(); + applog(LOG_WARNING, "affine_to_cpu_mask for %u returned %x", + id, last_error); } - } #else @@ -831,21 +835,22 @@ void scale_hash_for_display ( double* hashrate, char* prefix ) { *prefix = 'E'; *hashrate /= 1e18; } } -static inline void sprintf_et( char *str, uint64_t seconds ) +static inline void sprintf_et( char *str, int seconds ) { - uint64_t min = seconds / 60; - uint64_t sec = seconds % 60; - uint64_t hrs = min / 60; + // sprintf doesn't like uint64_t, Linux thinks it's long, Windows long long. + unsigned int min = seconds / 60; + unsigned int sec = seconds % 60; + unsigned int hrs = min / 60; if ( hrs ) { - uint64_t days = hrs / 24; + unsigned int days = hrs / 24; if ( days ) //0d00h - sprintf( str, "%llud%02lluh", days, hrs % 24 ); + sprintf( str, "%ud%02uh", days, hrs % 24 ); else // 0h00m - sprintf( str, "%lluh%02llum", hrs, min % 60 ); + sprintf( str, "%uh%02um", hrs, min % 60 ); } else // 0m00s - sprintf( str, "%llum%02llus", min, sec ); + sprintf( str, "%um%02us", min, sec ); } // Bitcoin formula for converting difficulty to an equivalent @@ -859,23 +864,33 @@ static inline void sprintf_et( char *str, uint64_t seconds ) const double diff_to_hash = 4294967296.; +static struct timeval session_start; static struct timeval five_min_start; -static double time_sum = 0.; static double latency_sum = 0.; static uint64_t submit_sum = 0; +static uint64_t accept_sum = 0; static uint64_t reject_sum = 0; +static double norm_diff_sum = 0.; static uint32_t last_block_height = 0; static double last_targetdiff = 0.; +static double ref_rate_hi = 0.; +static double ref_rate_lo = 1e100; +#if !(defined(__WINDOWS__) || defined(__WIN64)) +static uint32_t hi_temp = 0; +#endif +//static uint32_t stratum_errors = 0; struct share_stats_t { struct timeval submit_time; double net_diff; double share_diff; + double stratum_diff; + double target_diff; }; #define s_stats_size 8 -static struct share_stats_t share_stats[ s_stats_size ]; +static struct share_stats_t share_stats[ s_stats_size ] = {0}; static int s_get_ptr = 0, s_put_ptr = 0; static struct timeval last_submit_time = {0}; @@ -886,77 +901,102 @@ static inline int stats_ptr_incr( int p ) void report_summary_log( bool force ) { - struct timeval now, et; + struct timeval now, et, uptime, start_time; pthread_mutex_lock( &stats_lock ); gettimeofday( &now, NULL ); timeval_subtract( &et, &now, &five_min_start ); - if ( !force && et.tv_sec < 300 ) + if ( !( force && ( submit_sum || ( et.tv_sec > 5 ) ) ) + && ( et.tv_sec < 300 ) ) { pthread_mutex_unlock( &stats_lock ); return; } - // collect and reset global counters - double time = time_sum; time_sum = 0.; - uint64_t submits = submit_sum; submit_sum = 0; - uint64_t rejects = reject_sum; reject_sum = 0; - int latency = latency_sum; latency_sum = 0; + // collect and reset periodic counters + uint64_t submits = submit_sum; submit_sum = 0; + uint64_t accepts = accept_sum; accept_sum = 0; + uint64_t rejects = reject_sum; reject_sum = 0; +// int latency = latency_sum; latency_sum = 0; + memcpy( &start_time, &five_min_start, sizeof start_time ); memcpy( &five_min_start, &now, sizeof now ); pthread_mutex_unlock( &stats_lock ); - double ghrate = global_hashrate; - double scaled_ghrate = ghrate; - double shrate = time == 0. ? 0. : diff_to_hash * last_targetdiff - * (double)(submits - rejects) / time; - double scaled_shrate = shrate; - int avg_latency = 0; - double latency_pc = 0.; - double submit_rate = 0.; + timeval_subtract( &et, &now, &start_time ); + timeval_subtract( &uptime, &now, &session_start ); + + double share_time = (double)et.tv_sec + (double)et.tv_usec / 1e6; + double ghrate = global_hashrate; + double scaled_ghrate = ghrate; + double shrate = share_time == 0. ? 0. : diff_to_hash * last_targetdiff + * (double)(accepts) / share_time; + double sess_hrate = uptime.tv_sec == 0. ? 0. : diff_to_hash * norm_diff_sum + / (double)uptime.tv_sec; + double scaled_shrate = shrate; +// int avg_latency = 0; +// double latency_pc = 0.; + double submit_rate = 0.; char shr_units[4] = {0}; char ghr_units[4] = {0}; + char sess_hr_units[4] = {0}; char et_str[24]; + char upt_str[24]; - if ( submits ) - avg_latency = latency / submits; +// if ( submits ) avg_latency = latency / submits; - if ( time != 0. ) + if ( share_time != 0. ) { - submit_rate = (double)submits*60. / time; - latency_pc = (double)latency / (time * 10.); + submit_rate = (double)submits*60. / share_time; +// latency_pc = (double)latency / (share_time * 10.); } + if ( ghrate > ref_rate_hi ) ref_rate_hi = ghrate; + if ( ghrate < ref_rate_lo ) ref_rate_lo = ghrate; + scale_hash_for_display( &scaled_shrate, shr_units ); scale_hash_for_display( &scaled_ghrate, ghr_units ); + scale_hash_for_display( &sess_hrate, sess_hr_units ); + sprintf_et( et_str, et.tv_sec ); + sprintf_et( upt_str, uptime.tv_sec ); - applog( LOG_NOTICE, "Submitted %d shares in %s, %.2f /min, %ld rejected", - submits, et_str, submit_rate, rejects ); - applog2( LOG_INFO, "Share eqv: %.2f %sh/s, miner ref: %.2f %sh/s", - scaled_shrate, shr_units, scaled_ghrate, ghr_units ); + applog( LOG_NOTICE, "Periodic Report %s %s", et_str, upt_str ); + applog2( LOG_INFO, "Share rate %.2f/min %.2f/min", + submit_rate, (double)submitted_share_count*60. / + ( (double)uptime.tv_sec + (double)uptime.tv_usec / 1e6 ) ); + applog2( LOG_INFO, "Hash rate %7.2f%sh/s %7.2f%sh/s (%.2f%sh/s)", + scaled_shrate, shr_units, sess_hrate, sess_hr_units, + scaled_ghrate, ghr_units ); + applog2( LOG_INFO,"Submitted %6d %6d", + submits, submitted_share_count ); + applog2( LOG_INFO,"Accepted %6d %6d", + accepts, accepted_share_count ); + applog2( LOG_INFO,"Rejected %6d %6d", + rejects, rejected_share_count ); +// applog2( LOG_INFO,"Blocks solved %6d", +// solved_block_count ); + +#if !(defined(__WINDOWS__) || defined(__WIN64)) -#if ((defined(_WIN64) || defined(__WINDOWS__))) - applog2( LOG_INFO, "Network latency %d ms (%.2f%%)", - avg_latency, latency_pc ); -#else int temp = cpu_temp(0); char tempstr[32]; + if ( temp > hi_temp ) hi_temp = temp; if ( use_colors && ( temp >= 70 ) ) { if ( temp >= 80 ) - sprintf( tempstr, "%sCPU temp %d C%s", CL_WHT CL_RED, temp, CL_N ); + sprintf( tempstr, "%s%dC%s", CL_WHT CL_RED, temp, CL_N ); else - sprintf( tempstr, "%sCPU temp %d C%s", CL_WHT CL_YLW, temp, CL_N ); + sprintf( tempstr, "%s%dC%s", CL_WHT CL_YLW, temp, CL_N ); } else - sprintf( tempstr, "CPU temp %d C", temp ); + sprintf( tempstr, "%dC", temp ); + + applog2(LOG_INFO,"CPU temp %s max %dC", tempstr, hi_temp ); - applog2( LOG_INFO, "Network latency %d ms (%.2f%%), %s", - avg_latency, latency_pc, tempstr ); #endif } @@ -983,6 +1023,7 @@ static int share_result( int result, struct work *null_work, } else { + // empty queue, it must have overflowed and stats were lost for a share. pthread_mutex_unlock( &stats_lock ); applog(LOG_WARNING,"Pending shares overflow, stats for share are lost."); } @@ -992,9 +1033,9 @@ static int share_result( int result, struct work *null_work, { gettimeofday( &ack_time, NULL ); timeval_subtract( &latency_tv, &ack_time, &my_stats.submit_time ); - latency = ( latency_tv.tv_sec * 1000 + latency_tv.tv_usec / 1000 ); + latency = ( latency_tv.tv_sec * 1e3 + latency_tv.tv_usec / 1e3 ); timeval_subtract( &et, &my_stats.submit_time, &last_submit_time ); - share_time = (double)et.tv_sec + ( (double)et.tv_usec / 1000000. ); + share_time = (double)et.tv_sec + ( (double)et.tv_usec / 1e6 ); memcpy( &last_submit_time, &my_stats.submit_time, sizeof last_submit_time ); } @@ -1003,11 +1044,23 @@ static int share_result( int result, struct work *null_work, my_stats.net_diff * 100.; // check result + if ( result ) + { + accepted_share_count++; + if ( ( my_stats.net_diff > 0. ) && ( my_stats.share_diff >= net_diff ) ) + { + solved = true; + solved_block_count++; + } + } + else + rejected_share_count++; +/* result ? accepted_share_count++ : rejected_share_count++; solved = result && (my_stats.net_diff > 0.0 ) && ( my_stats.share_diff >= net_diff ); solved_block_count += solved ? 1 : 0 ; - +*/ // update global counters for summary report pthread_mutex_lock( &stats_lock ); @@ -1019,9 +1072,14 @@ static int share_result( int result, struct work *null_work, global_hashcount = hashcount; global_hashrate = hashrate; - time_sum += share_time; - submit_sum ++; - reject_sum += (uint64_t)!result; + if ( result ) + { + accept_sum++; + norm_diff_sum += my_stats.target_diff; + } + else + reject_sum++; + submit_sum++; latency_sum += latency; pthread_mutex_unlock( &stats_lock ); @@ -1057,7 +1115,7 @@ static int share_result( int result, struct work *null_work, bin2hex( str3, (unsigned char*)str2, 12 ); applog2( LOG_INFO, "Hash: %s...", str3 ); - diff_to_target( str1, last_targetdiff ); + diff_to_target( str1, my_stats.target_diff ); for ( int i = 0; i < 8; i++ ) be32enc( str2 + i, str1[7 - i] ); bin2hex( str3, (unsigned char*)str2, 12 ); @@ -1569,42 +1627,44 @@ static void *workio_thread(void *userdata) bool ok = true; curl = curl_easy_init(); - if (unlikely(!curl)) - { + if (unlikely( !curl ) ) + { applog(LOG_ERR, "CURL initialization failed"); return NULL; } - if(jsonrpc_2 && !have_stratum) - ok = rpc2_workio_login(curl); - while (ok) - { + if ( jsonrpc_2 && !have_stratum ) + ok = rpc2_workio_login( curl ); + + while (ok) + { struct workio_cmd *wc; /* wait for workio_cmd sent to us, on our queue */ wc = (struct workio_cmd *) tq_pop(mythr->q, NULL); if (!wc) - { + { ok = false; break; } /* process workio_cmd */ switch (wc->cmd) - { - case WC_GET_WORK: - ok = workio_get_work(wc, curl); - break; - case WC_SUBMIT_WORK: - ok = workio_submit_work(wc, curl); - break; + { + case WC_GET_WORK: + ok = workio_get_work(wc, curl); + break; + case WC_SUBMIT_WORK: + ok = workio_submit_work(wc, curl); + break; - default: /* should never happen */ - ok = false; - break; + default: /* should never happen */ + ok = false; + break; } workio_cmd_free(wc); } - tq_freeze(mythr->q); + + tq_freeze(mythr->q); curl_easy_cleanup(curl); return NULL; } @@ -1695,17 +1755,18 @@ void work_set_target_ratio( struct work* work, uint32_t* hash ) work->sharediff = 0.; // collect some share stats + // Frequent share submission combined with high latency can caused + // shares to be submitted faster than they are acked. If severe enough + // it can overflow the queue and overwrite stats for a share. pthread_mutex_lock( &stats_lock ); - // if buffer full discard the stats and don't increment pointer. - // We're on the clock so let share_result report it. - if ( share_stats[ s_put_ptr ].submit_time.tv_sec == 0 ) - { - gettimeofday( &share_stats[ s_put_ptr ].submit_time, NULL ); - share_stats[ s_put_ptr ].share_diff = work->sharediff; - share_stats[ s_put_ptr ].net_diff = net_diff; - s_put_ptr = stats_ptr_incr( s_put_ptr ); - } + gettimeofday( &share_stats[ s_put_ptr ].submit_time, NULL ); + share_stats[ s_put_ptr ].share_diff = work->sharediff; + share_stats[ s_put_ptr ].net_diff = net_diff; + share_stats[ s_put_ptr ].stratum_diff = stratum_diff; + share_stats[ s_put_ptr ].target_diff = work->targetdiff; + + s_put_ptr = stats_ptr_incr( s_put_ptr ); pthread_mutex_unlock( &stats_lock ); } @@ -1715,10 +1776,11 @@ bool submit_solution( struct work *work, void *hash, { if ( submit_work( thr, work ) ) { + submitted_share_count++; work_set_target_ratio( work, hash ); if ( !opt_quiet ) applog( LOG_BLUE, "Share %d submitted by thread %d", - accepted_share_count + rejected_share_count + 1, thr->id ); + submitted_share_count, thr->id ); return true; } else @@ -1731,10 +1793,11 @@ bool submit_lane_solution( struct work *work, void *hash, { if ( submit_work( thr, work ) ) { + submitted_share_count++; work_set_target_ratio( work, hash ); if ( !opt_quiet ) applog( LOG_BLUE, "Share %d submitted by thread %d, lane %d", - accepted_share_count + rejected_share_count + 1, thr->id, lane ); + submitted_share_count, thr->id, lane ); return true; } else @@ -1793,22 +1856,8 @@ static bool wanna_mine(int thr_id) return state; } -void std_wait_for_diff() -{ - while ( time(NULL) >= g_work_time + 120 ) - sleep(1); -} - // Common target functions, default usually listed first. -// pick your favorite or define your own -int64_t get_max64_0x1fffffLL() { return 0x1fffffLL; } // default -int64_t get_max64_0x40LL() { return 0x40LL; } -int64_t get_max64_0x3ffff() { return 0x3ffff; } -int64_t get_max64_0x3fffffLL() { return 0x3fffffLL; } -int64_t get_max64_0x1ffff() { return 0x1ffff; } -int64_t get_max64_0xffffLL() { return 0xffffLL; }; - // default void sha256d_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx ) { @@ -1943,9 +1992,11 @@ static void *miner_thread( void *userdata ) // what is an appropriate value that is completely neutral? // zero seems to work. No, it breaks benchmark. // uint32_t end_nonce = 0; - uint32_t end_nonce = opt_benchmark - ? ( 0xffffffffU / opt_n_threads ) * (thr_id + 1) - 0x20 - : 0; +// uint32_t end_nonce = opt_benchmark +// ? ( 0xffffffffU / opt_n_threads ) * (thr_id + 1) - 0x20 +// : 0; + uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20; + time_t firstwork_time = 0; int i; memset( &work, 0, sizeof(work) ); @@ -1996,37 +2047,42 @@ static void *miner_thread( void *userdata ) // Default affinity if ( (opt_affinity == (uint128_t)(-1) ) && opt_n_threads > 1 ) { + affine_to_cpu_mask( thr_id, (uint128_t)1 << (thr_id % num_cpus) ); if ( opt_debug ) applog( LOG_DEBUG, "Binding thread %d to cpu %d.", thr_id, thr_id % num_cpus, u128_hi64( (uint128_t)1 << (thr_id % num_cpus) ), u128_lo64( (uint128_t)1 << (thr_id % num_cpus) ) ); - affine_to_cpu_mask( thr_id, (uint128_t)1 << (thr_id % num_cpus) ); } #else - if ( (opt_affinity == -1LL) && opt_n_threads > 1 ) + if ( ( opt_affinity == -1 ) && ( opt_n_threads > 1 ) ) { + affine_to_cpu_mask( thr_id, 1 << (thr_id % num_cpus) ); if (opt_debug) applog( LOG_DEBUG, "Binding thread %d to cpu %d.", - thr_id, thr_id % num_cpus, 1LL << (thr_id % num_cpus)) ; - affine_to_cpu_mask( thr_id, 1ULL << (thr_id % num_cpus) ); + thr_id, thr_id % num_cpus, 1 << (thr_id % num_cpus)) ; } #endif else // Custom affinity { + affine_to_cpu_mask( thr_id, opt_affinity ); + if ( opt_debug ) + { #if AFFINITY_USES_UINT128 - if (opt_debug) - applog( LOG_DEBUG, "Binding thread %d to mask %016llx %016llx", + if ( num_cpus > 64 ) + applog( LOG_DEBUG, "Binding thread %d to mask %016llx %016llx", thr_id, u128_hi64( opt_affinity ), u128_lo64( opt_affinity ) ); + else + applog( LOG_DEBUG, "Binding thread %d to mask %016llx", + thr_id, opt_affinity ); #else - if (opt_debug) - applog( LOG_DEBUG, "Binding thread %d to mask %016llx", + applog( LOG_DEBUG, "Binding thread %d to mask %016llx", thr_id, opt_affinity ); #endif - affine_to_cpu_mask( thr_id, opt_affinity ); + } } - } + } // num_cpus > 1 if ( !algo_gate.miner_thread_init( thr_id ) ) { @@ -2034,18 +2090,20 @@ static void *miner_thread( void *userdata ) exit (1); } + // wait for stratum to send first job + if ( have_stratum ) while ( !stratum.job.job_id ) sleep(1); + while (1) { uint64_t hashes_done; struct timeval tv_start, tv_end, diff; - int64_t max64; + int64_t max64 = 1000; int nonce_found = 0; if ( algo_gate.do_this_thread( thr_id ) ) { if ( have_stratum ) { - algo_gate.wait_for_diff( &stratum ); pthread_mutex_lock( &g_work_lock ); if ( *algo_gate.get_nonceptr( work.data ) >= end_nonce ) algo_gate.stratum_gen_work( &stratum, &g_work ); @@ -2117,11 +2175,13 @@ static void *miner_thread( void *userdata ) } if ( remain < max64 ) max64 = remain; } - // max64 + // Select nonce range for approx 1 min duration based + // on hashrate, initial value arbitrarilly set to 1000 just to get + // a sample hashrate for the next time. uint32_t work_nonce = *( algo_gate.get_nonceptr( work.data ) ); - max64 *= thr_hashrates[thr_id]; + max64 = 60 * thr_hashrates[thr_id]; if ( max64 <= 0) - max64 = (int64_t)algo_gate.get_max64(); + max64 = 1000; if ( work_nonce + max64 > end_nonce ) max_nonce = end_nonce; else @@ -2171,8 +2231,6 @@ static void *miner_thread( void *userdata ) pthread_mutex_unlock( &g_work_lock ); } } - // Check if time for summary report - report_summary_log( false ); // display hashrate if ( !opt_quiet ) { @@ -2199,28 +2257,6 @@ static void *miner_thread( void *userdata ) thr_id, hc, hc_units, hr, hr_units ); } } -/* - if ( thr_id == 0 && !opt_benchmark ) - { - hashcount = 0.; - hashrate = 0.; - for ( i = 0; i < opt_n_threads; i++ ) - { - hashrate += thr_hashrates[i]; - hashcount += thr_hashcount[i]; - } - if ( hashcount != 0. ) - { - scale_hash_for_display( &hashcount, hc_units ); - scale_hash_for_display( &hashrate, hr_units ); - if ( hc_units[0] ) - sprintf( hc, "%.2f", hashcount ); - else // no fractions of a hash - sprintf( hc, "%.0f", hashcount ); - sprintf( hr, "%.2f", hashrate ); - } - } -*/ } // Display benchmark total @@ -2260,8 +2296,8 @@ static void *miner_thread( void *userdata ) hc, hc_units, hr, hr_units, (uint32_t)cpu_temp(0) ); #endif } - } - } + } + } // benchmark } // miner_thread loop out: @@ -2582,11 +2618,7 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work ) char share_ttf[32]; if ( stratum_diff != sctx->job.diff ) - { - // If diff is changing report summary from old diff first. - report_summary_log( stratum_diff != 0. ); applog( LOG_BLUE, "New stratum difficulty" ); - } if ( last_block_height != sctx->block_height ) applog( LOG_BLUE, "New block" ); @@ -2614,6 +2646,8 @@ void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work ) work_free( g_work ); work_copy( g_work, &sctx->work ); pthread_mutex_unlock( &sctx->work_lock ); + if ( last_block_height != stratum.block_height ) + last_block_height = stratum.block_height; } static void *stratum_thread(void *userdata ) @@ -2642,6 +2676,8 @@ static void *stratum_thread(void *userdata ) } else // if ( !opt_quiet ) applog(LOG_WARNING, "Stratum connection reset"); + // reset stats queue as well + s_get_ptr = s_put_ptr = 0; } while ( !stratum.curl ) @@ -2673,6 +2709,9 @@ static void *stratum_thread(void *userdata ) } } + report_summary_log( ( stratum_diff != stratum.job.diff ) + && ( stratum_diff != 0. ) ); + if ( stratum.job.job_id && ( !g_work_time || strcmp( stratum.job.job_id, g_work.job_id ) ) ) { @@ -2682,40 +2721,53 @@ static void *stratum_thread(void *userdata ) pthread_mutex_unlock(&g_work_lock); restart_threads(); +/* if ( stratum.job.clean || jsonrpc_2 ) { static uint32_t last_block_height; if ( last_block_height != stratum.block_height ) { last_block_height = stratum.block_height; -/* - if ( !opt_quiet ) - { - if ( net_diff > 0. ) - applog( LOG_BLUE, - "%s block %d, job %s, network diff %.4f", - algo_names[opt_algo], stratum.bloc_height, - g_work.job_id, net_diff); - else - applog( LOG_BLUE, "%s %s block %d, job %s", - short_url, algo_names[opt_algo], - stratum.bloc_height, g_work.job_id ); - } -*/ } -// else if ( !opt_quiet ) -// applog( LOG_BLUE,"New job %s.", g_work.job_id ); } - else if (opt_debug && !opt_quiet) + else +*/ + if (opt_debug && !opt_quiet) { applog( LOG_BLUE, "%s asks job %d for block %d", short_url, strtoul( stratum.job.job_id, NULL, 16 ), stratum.block_height ); } } // stratum.job.job_id + if ( stratum_socket_full( &stratum, opt_timeout ) ) + { + s = stratum_recv_line(&stratum); + if ( !s ) + applog(LOG_WARNING, "Stratum connection interrupted"); + } + else + { + s = NULL; + applog(LOG_ERR, "Stratum connection timeout"); + } + + if ( s ) + { + if ( !stratum_handle_method( &stratum, s ) ) + stratum_handle_response( s ); + free( s ); + } + else + { + // stratum_errors++; + // check if this redundant + stratum_disconnect( &stratum ); + } +/* if ( !stratum_socket_full( &stratum, opt_timeout ) ) { + stratum_errors++; applog(LOG_ERR, "Stratum connection timeout"); s = NULL; } @@ -2724,13 +2776,14 @@ static void *stratum_thread(void *userdata ) if ( !s ) { stratum_disconnect(&stratum); -// applog(LOG_WARNING, "Stratum connection interrupted"); + applog(LOG_WARNING, "Stratum connection interrupted"); continue; } if (!stratum_handle_method(&stratum, s)) stratum_handle_response(s); free(s); - } // loop +*/ + } // loop out: return NULL; } @@ -3074,10 +3127,6 @@ void parse_arg(int key, char *arg ) case 1012: opt_extranonce = false; break; - case 1013: - applog( LOG_WARNING, "hide-diff option is deprecated and has no effect.\n It will be removed in a future release. Stop using it."); - opt_showdiff = false; - break; case 1014: // hash-meter opt_hash_meter = true; break; @@ -3124,7 +3173,7 @@ void parse_arg(int key, char *arg ) // than 64 CPUs, otherwise zero extend the upper half. opt_affinity = (uint128_t)ul; if ( num_cpus > 64 ) - opt_affinity = (opt_affinity << 64 ) | (uint128_t)ul; + opt_affinity = (opt_affinity << 64 ) | opt_affinity; #else opt_affinity = ul; #endif @@ -3303,10 +3352,10 @@ bool check_cpu_capability () bool cpu_has_sse2 = has_sse2(); bool cpu_has_aes = has_aes_ni(); bool cpu_has_sse42 = has_sse42(); - bool cpu_has_avx = has_avx1(); + bool cpu_has_avx = has_avx(); bool cpu_has_avx2 = has_avx2(); bool cpu_has_sha = has_sha(); - bool cpu_has_avx512 = has_avx512f(); + bool cpu_has_avx512 = has_avx512(); bool sw_has_aes = false; bool sw_has_sse42 = false; bool sw_has_avx = false; @@ -3340,16 +3389,16 @@ bool check_cpu_capability () #ifdef __AVX2__ sw_has_avx2 = true; #endif - #ifdef __AVX512F__ + #if (defined(__AVX512F__) && defined(__AVX51DQF__) && defined(__AVX51BW__) && defined(__AVX512VL__)) sw_has_avx512 = true; #endif #ifdef __SHA__ sw_has_sha = true; #endif - #if !((__AES__) || (__SSE2__)) - printf("Neither __AES__ nor __SSE2__ defined.\n"); - #endif +// #if !((__AES__) || (__SSE2__)) +// printf("Neither __AES__ nor __SSE2__ defined.\n"); +// #endif cpu_brand_string( cpu_brand ); printf( "CPU: %s.\n", cpu_brand ); @@ -3465,7 +3514,7 @@ int main(int argc, char *argv[]) rpc_pass = strdup(""); opt_api_allow = strdup("127.0.0.1"); /* 0.0.0.0 for all ips */ - parse_cmdline(argc, argv); + parse_cmdline(argc, argv); #if defined(WIN32) // SYSTEM_INFO sysinfo; @@ -3486,9 +3535,9 @@ int main(int argc, char *argv[]) applog(LOG_DEBUG, "Found %d cpus on cpu group %d", cpus, i); } #else - SYSTEM_INFO sysinfo; - GetSystemInfo(&sysinfo); - num_cpus = sysinfo.dwNumberOfProcessors; + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + num_cpus = sysinfo.dwNumberOfProcessors; #endif #elif defined(_SC_NPROCESSORS_CONF) @@ -3504,21 +3553,21 @@ int main(int argc, char *argv[]) num_cpus = 1; - if (!opt_n_threads) - opt_n_threads = num_cpus; + if (!opt_n_threads) + opt_n_threads = num_cpus; - if ( opt_algo == ALGO_NULL ) - { - fprintf(stderr, "%s: no algo supplied\n", argv[0]); - show_usage_and_exit(1); - } + if ( opt_algo == ALGO_NULL ) + { + fprintf(stderr, "%s: no algo supplied\n", argv[0]); + show_usage_and_exit(1); + } if ( !opt_benchmark ) - { - if ( !short_url ) - { - fprintf(stderr, "%s: no URL supplied\n", argv[0]); - show_usage_and_exit(1); - } + { + if ( !short_url ) + { + fprintf(stderr, "%s: no URL supplied\n", argv[0]); + show_usage_and_exit(1); + } /* if ( !rpc_url ) { @@ -3557,6 +3606,7 @@ int main(int argc, char *argv[]) memset( share_stats, 0, 2 * sizeof (struct share_stats_t) ); gettimeofday( &last_submit_time, NULL ); memcpy( &five_min_start, &last_submit_time, sizeof (struct timeval) ); + memcpy( &session_start, &last_submit_time, sizeof (struct timeval) ); if ( !check_cpu_capability() ) exit(1); @@ -3578,7 +3628,7 @@ int main(int argc, char *argv[]) #ifndef WIN32 if (opt_background) - { + { i = fork(); if (i < 0) exit(1); if (i > 0) exit(0); @@ -3596,7 +3646,7 @@ int main(int argc, char *argv[]) #else SetConsoleCtrlHandler((PHANDLER_ROUTINE)ConsoleHandler, TRUE); if (opt_background) - { + { HWND hcon = GetConsoleWindow(); if (hcon) { // this method also hide parent command line window @@ -3608,7 +3658,7 @@ int main(int argc, char *argv[]) } } if (opt_priority > 0) - { + { DWORD prio = NORMAL_PRIORITY_CLASS; switch (opt_priority) { case 1: @@ -3631,20 +3681,32 @@ int main(int argc, char *argv[]) applog( LOG_INFO,"%u CPU cores available, %u miner threads selected.", num_cpus, opt_n_threads ); -// To be reviewed +// To be confirmed with more than 64 cpus if ( opt_affinity != -1 ) { - if ( num_cpus > 64 ) + if ( !affinity_uses_uint128 && num_cpus > 64 ) { - applog(LOG_WARNING,"--cpu-affinity argument is not supported with more"); - applog(LOG_WARNING," than 64 CPUs, using default affinity."); + applog(LOG_WARNING,"Setting CPU affinity with more than 64 CPUs is only"); + applog(LOG_WARNING,"available on Linux. Using default affinity."); opt_affinity = -1; } else { - if (!opt_quiet) - applog(LOG_DEBUG, "Binding process to cpu mask %x", opt_affinity); - affine_to_cpu_mask( -1, (unsigned long)opt_affinity ); + affine_to_cpu_mask( -1, opt_affinity ); + if ( !opt_quiet ) + { +#if AFFINITY_USES_UINT128 + if ( num_cpus > 64 ) + applog(LOG_DEBUG, "Binding process to cpu mask %x", + u128_hi64( opt_affinity ), u128_lo64( opt_affinity ) ); + else + applog(LOG_DEBUG, "Binding process to cpu mask %x", + opt_affinity ); +#else + applog(LOG_DEBUG, "Binding process to cpu mask %x", + opt_affinity ); +#endif + } } } @@ -3687,7 +3749,7 @@ int main(int argc, char *argv[]) /* ESET-NOD32 Detects these 2 thread_create... */ if (want_longpoll && !have_stratum) - { + { /* init longpoll thread info */ longpoll_thr_id = opt_n_threads + 1; thr = &thr_info[longpoll_thr_id]; @@ -3703,7 +3765,7 @@ int main(int argc, char *argv[]) } } if (want_stratum) - { + { /* init stratum thread info */ stratum_thr_id = opt_n_threads + 2; thr = &thr_info[stratum_thr_id]; @@ -3723,7 +3785,7 @@ int main(int argc, char *argv[]) } if (opt_api_listen) - { + { /* api thread */ api_thr_id = opt_n_threads + 3; thr = &thr_info[api_thr_id]; @@ -3740,7 +3802,7 @@ int main(int argc, char *argv[]) /* start mining threads */ for (i = 0; i < opt_n_threads; i++) - { + { thr = &thr_info[i]; thr->id = i; thr->q = tq_new(); diff --git a/miner.h b/miner.h index 5a85dda..73face1 100644 --- a/miner.h +++ b/miner.h @@ -352,6 +352,7 @@ bool submit_lane_solution( struct work *work, void *hash, void get_currentalgo( char* buf, int sz ); +/* bool has_sha(); bool has_aes_ni(); bool has_avx1(); @@ -368,6 +369,7 @@ void cpu_getmodelid(char *outbuf, size_t maxsz); void cpu_brand_string( char* s ); float cpu_temp( int core ); +*/ struct work { uint32_t data[48] __attribute__ ((aligned (64))); @@ -724,7 +726,6 @@ extern bool opt_debug; extern bool opt_debug_diff; extern bool opt_benchmark; extern bool opt_protocol; -extern bool opt_showdiff; extern bool opt_extranonce; extern bool opt_quiet; extern bool opt_redirect; @@ -867,7 +868,7 @@ Options:\n\ x16rv2 Ravencoin (RVN)\n\ x16rt Gincoin (GIN)\n\ x16rt-veil Veil (VEIL)\n\ - x16s Pigeoncoin (PGN)\n\ + x16s\n\ x17\n\ x21s\n\ xevan Bitsend (BSD)\n\ @@ -901,7 +902,6 @@ Options:\n\ -f, --diff-factor Divide req. difficulty by this factor (std is 1.0)\n\ -m, --diff-multiplier Multiply difficulty by this factor (std is 1.0)\n\ --hash-meter Display thread hash rates\n\ - --hide-diff Do not display changes in difficulty\n\ --coinbase-addr=ADDR payout address for solo mining\n\ --coinbase-sig=TEXT data to insert in the coinbase when possible\n\ --no-longpoll disable long polling support\n\ @@ -965,7 +965,6 @@ static struct option const options[] = { { "diff", 1, NULL, 'f' }, // deprecated (alias) { "diff-multiplier", 1, NULL, 'm' }, { "hash-meter", 0, NULL, 1014 }, - { "hide-diff", 0, NULL, 1013 }, { "help", 0, NULL, 'h' }, { "key", 1, NULL, 'K' }, { "no-gbt", 0, NULL, 1011 }, diff --git a/simd-utils/simd-128.h b/simd-utils/simd-128.h index a507f49..1a81efa 100644 --- a/simd-utils/simd-128.h +++ b/simd-utils/simd-128.h @@ -298,30 +298,38 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n ) // 64 and 32 bit elements. // compiler doesn't like when a variable is used for the last arg of -// _mm_rol_epi32, must be "8 bit immediate". +// _mm_rol_epi32, must be "8 bit immediate". Therefore use rol_var where +// necessary. // sm3-hash-4way.c fails to compile. + +#define mm128_ror_var_64( v, c ) \ + _mm_or_si128( _mm_srli_epi64( v, c ), _mm_slli_epi64( v, 64-(c) ) ) + +#define mm128_rol_var_64( v, c ) \ + _mm_or_si128( _mm_slli_epi64( v, c ), _mm_srli_epi64( v, 64-(c) ) ) + +#define mm128_ror_var_32( v, c ) \ + _mm_or_si128( _mm_srli_epi32( v, c ), _mm_slli_epi32( v, 32-(c) ) ) + +#define mm128_rol_var_32( v, c ) \ + _mm_or_si128( _mm_slli_epi32( v, c ), _mm_srli_epi32( v, 32-(c) ) ) + + /* #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) -#define mm128_ror_64( v, c ) _mm_ror_epi64( v, c ) -#define mm128_rol_64( v, c ) _mm_rol_epi64( v, c ) -#define mm128_ror_32( v, c ) _mm_ror_epi32( v, c ) -#define mm128_rol_32( v, c ) _mm_rol_epi32( v, c ) +#define mm128_ror_64 _mm_ror_epi64 +#define mm128_rol_64 _mm_rol_epi64 +#define mm128_ror_32 _mm_ror_epi32 +#define mm128_rol_32 _mm_rol_epi32 #else */ -#define mm128_ror_64( v, c ) \ - _mm_or_si128( _mm_srli_epi64( v, c ), _mm_slli_epi64( v, 64-(c) ) ) - -#define mm128_rol_64( v, c ) \ - _mm_or_si128( _mm_slli_epi64( v, c ), _mm_srli_epi64( v, 64-(c) ) ) - -#define mm128_ror_32( v, c ) \ - _mm_or_si128( _mm_srli_epi32( v, c ), _mm_slli_epi32( v, 32-(c) ) ) - -#define mm128_rol_32( v, c ) \ - _mm_or_si128( _mm_slli_epi32( v, c ), _mm_srli_epi32( v, 32-(c) ) ) +#define mm128_ror_64 mm128_ror_var_64 +#define mm128_rol_64 mm128_rol_var_64 +#define mm128_ror_32 mm128_ror_var_32 +#define mm128_rol_32 mm128_rol_var_32 //#endif // AVX512 else diff --git a/simd-utils/simd-256.h b/simd-utils/simd-256.h index 185cd37..e850697 100644 --- a/simd-utils/simd-256.h +++ b/simd-utils/simd-256.h @@ -367,38 +367,49 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n ) // // AVX512 has bit rotate for 256 bit vectors with 64 or 32 bit elements + // compiler doesn't like when a variable is used for the last arg of -// _mm_rol_epi32, must be "8 bit immediate". +// _mm_rol_epi32, must be "8 bit immediate". Therefore use rol_var where +// necessary. + +#define mm256_ror_var_64( v, c ) \ + _mm256_or_si256( _mm256_srli_epi64( v, c ), \ + _mm256_slli_epi64( v, 64-(c) ) ) + +#define mm256_rol_var_64( v, c ) \ + _mm256_or_si256( _mm256_slli_epi64( v, c ), \ + _mm256_srli_epi64( v, 64-(c) ) ) + +#define mm256_ror_var_32( v, c ) \ + _mm256_or_si256( _mm256_srli_epi32( v, c ), \ + _mm256_slli_epi32( v, 32-(c) ) ) + +#define mm256_rol_var_32( v, c ) \ + _mm256_or_si256( _mm256_slli_epi32( v, c ), \ + _mm256_srli_epi32( v, 32-(c) ) ) + /* #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) -#define mm256_ror_64( v, c ) _mm256_ror_epi64( v, c ) -#define mm256_rol_64( v, c ) _mm256_rol_epi64( v, c ) -#define mm256_ror_32( v, c ) _mm256_ror_epi32( v, c ) -#define mm256_rol_32( v, c ) _mm256_rol_epi32( v, c ) +// AVX512, control must be 8 bit immediate. + +#define mm256_ror_64 _mm256_ror_epi64 +#define mm256_rol_64 _mm256_rol_epi64 +#define mm256_ror_32 _mm256_ror_epi32 +#define mm256_rol_32 _mm256_rol_epi32 #else */ -#define mm256_ror_64( v, c ) \ - _mm256_or_si256( _mm256_srli_epi64( v, c ), \ - _mm256_slli_epi64( v, 64-(c) ) ) +// No AVX512, use fallback. -#define mm256_rol_64( v, c ) \ - _mm256_or_si256( _mm256_slli_epi64( v, c ), \ - _mm256_srli_epi64( v, 64-(c) ) ) - -#define mm256_ror_32( v, c ) \ - _mm256_or_si256( _mm256_srli_epi32( v, c ), \ - _mm256_slli_epi32( v, 32-(c) ) ) - -#define mm256_rol_32( v, c ) \ - _mm256_or_si256( _mm256_slli_epi32( v, c ), \ - _mm256_srli_epi32( v, 32-(c) ) ) +#define mm256_ror_64 mm256_ror_var_64 +#define mm256_rol_64 mm256_rol_var_64 +#define mm256_ror_32 mm256_ror_var_32 +#define mm256_rol_32 mm256_rol_var_32 // #endif // AVX512 else - #define mm256_ror_16( v, c ) \ _mm256_or_si256( _mm256_srli_epi16( v, c ), \ _mm256_slli_epi16( v, 16-(c) ) ) diff --git a/simd-utils/simd-512.h b/simd-utils/simd-512.h index d5ed059..cd8d514 100644 --- a/simd-utils/simd-512.h +++ b/simd-utils/simd-512.h @@ -278,7 +278,7 @@ static inline __m512i mm512_neg1_fn() // Horizontal vector testing #define mm512_allbits0( a ) _mm512_cmpeq_epi64_mask( a, m512_zero ) -#define mm256_allbits1( a ) _mm512_cmpeq_epi64_mask( a, m512_neg1 ) +#define mm512_allbits1( a ) _mm512_cmpeq_epi64_mask( a, m512_neg1 ) #define mm512_anybits0( a ) _mm512_cmpneq_epi64_mask( a, m512_neg1 ) #define mm512_anybits1( a ) _mm512_cmpneq_epi64_mask( a, m512_zero ) @@ -287,11 +287,30 @@ static inline __m512i mm512_neg1_fn() // Bit rotations. // AVX512F has built-in fixed and variable bit rotation for 64 & 32 bit -// elements and can be called directly. +// elements and can be called directly. But they only accept immediate 8 +// for control arg. // // _mm512_rol_epi64, _mm512_ror_epi64, _mm512_rol_epi32, _mm512_ror_epi32 // _mm512_rolv_epi64, _mm512_rorv_epi64, _mm512_rolv_epi32, _mm512_rorv_epi32 // + +#define mm512_ror_var_64( v, c ) \ + _mm512_or_si512( _mm512_srli_epi64( v, c ), \ + _mm512_slli_epi64( v, 64-(c) ) ) + +#define mm512_rol_var_64( v, c ) \ + _mm512_or_si512( _mm512_slli_epi64( v, c ), \ + _mm512_srli_epi64( v, 64-(c) ) ) + +#define mm512_ror_var_32( v, c ) \ + _mm512_or_si512( _mm512_srli_epi32( v, c ), \ + _mm512_slli_epi32( v, 32-(c) ) ) + +#define mm512_rol_var_32( v, c ) \ + _mm512_or_si512( _mm512_slli_epi32( v, c ), \ + _mm512_srli_epi32( v, 32-(c) ) ) + + // Here is a fixed bit rotate for 16 bit elements: #define mm512_ror_16( v, c ) \ _mm512_or_si512( _mm512_srli_epi16( v, c ), \ @@ -300,6 +319,8 @@ static inline __m512i mm512_neg1_fn() _mm512_or_si512( _mm512_slli_epi16( v, c ), \ _mm512_srli_epi16( v, 16-(c) ) + + // Rotations using a vector control index are very slow due to overhead // to generate the index vector. Repeated rotations using the same index // are better handled by the calling function where the index only needs diff --git a/sysinfos.c b/sysinfos.c index cf8fb8f..76f9815 100644 --- a/sysinfos.c +++ b/sysinfos.c @@ -1,8 +1,13 @@ +#if !defined(SYSINJFOS_C___) +#define SYSINFOS_C__ + /** * Unit to read cpu informations * * tpruvot 2014 - */ + * JayDDee 2019 + * +*/ #include #include @@ -28,7 +33,7 @@ #define HWMON_ALT5 \ "/sys/class/hwmon/hwmon0/device/temp1_input" -static float linux_cputemp(int core) +static inline float linux_cputemp(int core) { float tc = 0.0; FILE *fd = fopen(HWMON_PATH, "r"); @@ -60,7 +65,7 @@ static float linux_cputemp(int core) #define CPUFREQ_PATH \ "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq" -static uint32_t linux_cpufreq(int core) +static inline uint32_t linux_cpufreq(int core) { FILE *fd = fopen(CPUFREQ_PATH, "r"); uint32_t freq = 0; @@ -76,7 +81,7 @@ static uint32_t linux_cpufreq(int core) #else /* WIN32 */ -static float win32_cputemp(int core) +static inline float win32_cputemp(int core) { // todo return 0.0; @@ -88,7 +93,7 @@ static float win32_cputemp(int core) /* exports */ -float cpu_temp(int core) +static inline float cpu_temp(int core) { #ifdef WIN32 return win32_cputemp(core); @@ -97,7 +102,7 @@ float cpu_temp(int core) #endif } -uint32_t cpu_clock(int core) +static inline uint32_t cpu_clock(int core) { #ifdef WIN32 return 0; @@ -106,7 +111,7 @@ uint32_t cpu_clock(int core) #endif } -int cpu_fanpercent() +static inline int cpu_fanpercent() { return 0; } @@ -142,7 +147,7 @@ static inline void cpuid(int functionnumber, int output[4]) { #define cpuid(fn, out) out[0] = 0; #endif -void cpu_getname(char *outbuf, size_t maxsz) +static inline void cpu_getname(char *outbuf, size_t maxsz) { memset(outbuf, 0, maxsz); #ifdef WIN32 @@ -190,7 +195,7 @@ void cpu_getname(char *outbuf, size_t maxsz) #endif } -void cpu_getmodelid(char *outbuf, size_t maxsz) +static inline void cpu_getmodelid(char *outbuf, size_t maxsz) { memset(outbuf, 0, maxsz); #ifdef WIN32 @@ -259,32 +264,47 @@ void cpu_getmodelid(char *outbuf, size_t maxsz) #define CPU_BRAND_2 (0x80000003) #define CPU_BRAND_3 (0x80000004) +// Registers #define EAX_Reg (0) #define EBX_Reg (1) #define ECX_Reg (2) #define EDX_Reg (3) -#define XSAVE_Flag (1<<26) // ECX +// Feature flags + +// CPU_INFO ECX +#define XSAVE_Flag (1<<26) #define OSXSAVE_Flag (1<<27) -#define AVX1_Flag (1<<28) +#define AVX_Flag (1<<28) #define XOP_Flag (1<<11) #define FMA3_Flag (1<<12) #define AES_Flag (1<<25) #define SSE42_Flag (1<<20) +// CPU_INFO EDX #define SSE_Flag (1<<25) // EDX #define SSE2_Flag (1<<26) -#define AVX2_Flag (1<< 5) // ADV EBX +// EXTENDED_FEATURES EBX +#define AVX2_Flag (1<< 5) #define AVX512F_Flag (1<<16) +#define AVX512DQ_Flag (1<<17) #define SHA_Flag (1<<29) +#define AVX512BW_Flag (1<<30) +#define AVX512VL_Flag (1<<31) + +// EXTENDED_FEATURES ECX +#define AVX512VBMI_Flag (1<<1) +#define AVX512VBMI2_Flag (1<<6) +#define AVX512VAES_Flag (1<<9) + // Use this to detect presence of feature -#define AVX1_mask (AVX1_Flag|XSAVE_Flag|OSXSAVE_Flag) -#define FMA3_mask (FMA3_Flag|AVX1_mask) +#define AVX_mask (AVX_Flag|XSAVE_Flag|OSXSAVE_Flag) +#define FMA3_mask (FMA3_Flag|AVX_mask) +#define AVX512_mask (AVX512VL_Flag|AVX512BW_Flag|AVX512DQ_Flag|AVX512F_Flag) - -static inline bool has_sha_() +static inline bool has_sha() { #ifdef __arm__ return false; @@ -295,10 +315,7 @@ static inline bool has_sha_() #endif } -bool has_sha() { return has_sha_(); } - - -static inline bool has_sse2_() +static inline bool has_sse2() { #ifdef __arm__ return false; @@ -309,10 +326,8 @@ static inline bool has_sse2_() #endif } -bool has_sse2() { return has_sse2_(); } - -// nehalem and above, no AVX1 on nehalem -static inline bool has_aes_ni_() +// nehalem and above, no AVX on nehalem +static inline bool has_aes_ni() { #ifdef __arm__ return false; @@ -323,24 +338,20 @@ static inline bool has_aes_ni_() #endif } -bool has_aes_ni() { return has_aes_ni_(); } - // westmere and above -static inline bool has_avx1_() +static inline bool has_avx() { #ifdef __arm__ return false; #else int cpu_info[4] = { 0 }; cpuid( CPU_INFO, cpu_info ); - return ( ( cpu_info[ ECX_Reg ] & AVX1_mask ) == AVX1_mask ); + return ( ( cpu_info[ ECX_Reg ] & AVX_mask ) == AVX_mask ); #endif } -bool has_avx1() { return has_avx1_(); } - // haswell and above -static inline bool has_avx2_() +static inline bool has_avx2() { #ifdef __arm__ return false; @@ -351,9 +362,7 @@ static inline bool has_avx2_() #endif } -bool has_avx2() { return has_avx2_(); } - -static inline bool has_avx512f_() +static inline bool has_avx512f() { #ifdef __arm__ return false; @@ -364,24 +373,75 @@ static inline bool has_avx512f_() #endif } -bool has_avx512f() { return has_avx512f_(); } +static inline bool has_avx512dq() +{ +#ifdef __arm__ + return false; +#else + int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, cpu_info ); + return cpu_info[ EBX_Reg ] & AVX512DQ_Flag; +#endif +} +static inline bool has_avx512bw() +{ +#ifdef __arm__ + return false; +#else + int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, cpu_info ); + return cpu_info[ EBX_Reg ] & AVX512BW_Flag; +#endif +} + +static inline bool has_avx512vl() +{ +#ifdef __arm__ + return false; +#else + int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, cpu_info ); + return cpu_info[ EBX_Reg ] & AVX512VL_Flag; +#endif +} + +// Minimum to be useful +static inline bool has_avx512() +{ +#ifdef __arm__ + return false; +#else + int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, cpu_info ); + return ( ( cpu_info[ EBX_Reg ] & AVX512_mask ) == AVX512_mask ); +#endif +} + +static inline bool has_avx512vaes() +{ +#ifdef __arm__ + return false; +#else + int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, cpu_info ); + return cpu_info[ ECX_Reg ] & AVX512VAES_Flag; +#endif +} // AMD only -static inline bool has_xop_() +static inline bool has_xop() { #ifdef __arm__ return false; #else int cpu_info[4] = { 0 }; - cpuid( CPU_INFO, cpu_info ); + cpuid( EXTENDED_CPU_INFO, cpu_info ); return cpu_info[ ECX_Reg ] & XOP_Flag; #endif } -bool has_xop() { return has_xop_(); } - -static inline bool has_fma3_() +static inline bool has_fma3() { #ifdef __arm__ return false; @@ -392,9 +452,7 @@ static inline bool has_fma3_() #endif } -bool has_fma3() { return has_fma3_(); } - -static inline bool has_sse42_() +static inline bool has_sse42() { #ifdef __arm__ return false; @@ -405,9 +463,7 @@ static inline bool has_sse42_() #endif } -bool has_sse42() { return has_sse42_(); } - -static inline bool has_sse_() +static inline bool has_sse() { #ifdef __arm__ return false; @@ -418,16 +474,14 @@ static inline bool has_sse_() #endif } -bool has_sse() { return has_sse_(); } - -uint32_t cpuid_get_highest_function_number() +static inline uint32_t cpuid_get_highest_function_number() { uint32_t cpu_info[4] = {0}; cpuid( VENDOR_ID, cpu_info); return cpu_info[ EAX_Reg ]; } -void cpuid_get_highest_function( char* s ) +static inline void cpuid_get_highest_function( char* s ) { uint32_t fn = cpuid_get_highest_function_number(); switch (fn) @@ -449,7 +503,7 @@ void cpuid_get_highest_function( char* s ) } } -void cpu_bestfeature(char *outbuf, size_t maxsz) +static inline void cpu_bestfeature(char *outbuf, size_t maxsz) { #ifdef __arm__ sprintf(outbuf, "ARM"); @@ -459,19 +513,19 @@ void cpu_bestfeature(char *outbuf, size_t maxsz) cpuid( CPU_INFO, cpu_info ); cpuid( EXTENDED_FEATURES, cpu_info_adv ); - if ( has_avx1_() && has_avx2_() ) + if ( has_avx() && has_avx2() ) sprintf(outbuf, "AVX2"); - else if ( has_avx1_() ) - sprintf(outbuf, "AVX1"); - else if ( has_fma3_() ) + else if ( has_avx() ) + sprintf(outbuf, "AVX"); + else if ( has_fma3() ) sprintf(outbuf, "FMA3"); - else if ( has_xop_() ) + else if ( has_xop() ) sprintf(outbuf, "XOP"); - else if ( has_sse42_() ) + else if ( has_sse42() ) sprintf(outbuf, "SSE42"); - else if ( has_sse2_() ) + else if ( has_sse2() ) sprintf(outbuf, "SSE2"); - else if ( has_sse_() ) + else if ( has_sse() ) sprintf(outbuf, "SSE"); else *outbuf = '\0'; @@ -479,7 +533,7 @@ void cpu_bestfeature(char *outbuf, size_t maxsz) #endif } -void cpu_brand_string( char* s ) +static inline void cpu_brand_string( char* s ) { #ifdef __arm__ sprintf( s, "ARM" ); @@ -498,3 +552,5 @@ void cpu_brand_string( char* s ) #endif } +#endif // SYSINFOS_C__ + diff --git a/util.c b/util.c index 7e07ae2..cc181ff 100644 --- a/util.c +++ b/util.c @@ -24,6 +24,7 @@ #include #include #include +#include "sysinfos.c" #include #include #include