v3.9.9.1

2026-07-15 03:16:49 +00:00 · 2019-10-24 14:11:26 -04:00
parent 72330eb5a7
commit 86b889e1b0
65 changed files with 547 additions and 638 deletions
--- a/README.md
+++ b/README.md
@@ -122,10 +122,10 @@ Supported Algorithms
                          x13sm3        hsr (Hshare)
                          x14           X14
                          x15           X15
-                          x16r          Ravencoin (RVN) (original algo)
-                          x16rv2        Ravencoin (RVN) (new algo)
+                          x16r          
+                          x16rv2        Ravencoin (RVN)
                          x16rt         Gincoin (GIN)
-                          x16rt_veil    Veil (VEIL)
+                          x16rt-veil    Veil (VEIL)
                          x16s          Pigeoncoin (PGN)
                          x17
                          x21s
@@ -136,7 +136,7 @@ Supported Algorithms
                          yescryptr32   WAVI
                          yespower      Cryply
                          yespowerr16   Yenten (YTN)
-                          yespoer-b2b   generic yespower + blake2b
+                          yespower-b2b  generic yespower + blake2b
                          zr5           Ziftr

 Errata
@@ -160,10 +160,12 @@ Bugs
 ----

 Users are encouraged to post their bug reports using git issues or on the
-Bitcoin Talk forum at:
+Bitcoin Talk forum or opening an issue in git:

 https://bitcointalk.org/index.php?topic=1326803.0

+https://github.com/JayDDee/cpuminer-opt/issues
+
 All problem reports must be accompanied by a proper problem definition.
 This should include how the problem occurred, the command line and
 output from the miner showing the startup messages and any errors.
@@ -175,10 +177,6 @@ Donations
 cpuminer-opt has no fees of any kind but donations are accepted.

 BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
- ETH: 0x72122edabcae9d3f57eab0729305a425f6fef6d0
- LTC: LdUwoHJnux9r9EKqFWNvAi45kQompHk6e8
- BCH: 1QKYkB6atn4P7RFozyziAXLEnurwnUM1cQ
- BTG: GVUyECtRHeC5D58z9F3nGGfVQndwnsPnHQ

 Happy mining!

--- a/22
+++ b/22
@@ -1,11 +1,6 @@
 cpuminer-opt is a console program run from the command line using the
 keyboard, not the mouse.

-cpuminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
-This feature requires recent SW including GCC version 5 or higher and
-openssl version 1.1 or higher. It may also require using "-march=znver1"
-compile flag.
-
 Security warning
 ----------------

@@ -36,17 +31,26 @@ FreeBSD YMMV.
 Change Log
 ----------

+v3.9.9.1
+
+Fixed a day1 bug that could cause the miner to idle for up to 2 minutes
+under certain circumstances.
+
+Redesigned summary stats report now includes session statistics.
+
+More robust handling of statistics to reduce corruption.
+
+Removed --hide-diff option.
+
+Better handling of cpu-affinity with more than 64 CPUs.
+
 v3.9.9

 Added power2b algo for MicroBitcoin.
-
 Added generic yespower-b2b (yespower + blake2b) algo to be used with
 the parameters introduced in v3.9.7 for yespower & yescrypt.
-
 Display additional info when a share is rejected.
-
 Some low level enhancements and minor tweaking of log output.
-
 RELEASE_NOTES (this file) and README.md  added to Windows release package.

 v3.9.8.1
--- a/algo-gate-api.c
+++ b/algo-gate-api.c
@@ -116,8 +116,6 @@ void init_algo_gate( algo_gate_t* gate )
   gate->get_nonceptr            = (void*)&std_get_nonceptr;
   gate->work_decode             = (void*)&std_le_work_decode;
   gate->decode_extra_data       = (void*)&do_nothing;
-   gate->wait_for_diff           = (void*)&std_wait_for_diff;
-   gate->get_max64               = (void*)&get_max64_0x1fffffLL;
   gate->gen_merkle_root         = (void*)&sha256d_gen_merkle_root;
   gate->stratum_gen_work        = (void*)&std_stratum_gen_work;
   gate->build_stratum_request   = (void*)&std_le_build_stratum_request;
@@ -278,7 +276,7 @@ bool register_json_rpc2( algo_gate_t *gate )
  applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
  applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");

-  gate->wait_for_diff           = (void*)&do_nothing;
+//  gate->wait_for_diff           = (void*)&do_nothing;
  gate->get_new_work            = (void*)&jr2_get_new_work;
  gate->get_nonceptr            = (void*)&jr2_get_nonceptr;
  gate->stratum_gen_work        = (void*)&jr2_stratum_gen_work;
--- a/algo-gate-api.h
+++ b/algo-gate-api.h
@@ -35,7 +35,7 @@
 //    6. Determine if other non existant functions are required.
 //    That is determined by the need to add code in cpu-miner.c
 //    that applies only to the new algo. That is forbidden. All
-//    algo specific code must be in theh algo's file.
+//    algo specific code must be in the algo's file.
 //
 //    7. If new functions need to be added to the gate add the type
 //    to the structure, declare a null instance in this file and define
@@ -48,10 +48,10 @@
 //    instances as they are defined by default, or unsafe functions that
 //    are not needed by the algo.
 //
-//    9. Add an case entry to the switch/case in function register_gate
+//    9. Add a case entry to the switch/case in function register_gate
 //    in file algo-gate-api.c for the new algo.
 //
-//    10 If a new function type was defined add an entry to ini talgo_gate
+//    10 If a new function type was defined add an entry to init algo_gate
 //    to initialize the new function to its null instance described in step 7.
 //
 //    11. If the new algo has aliases add them to the alias array in
@@ -110,14 +110,7 @@ inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }

 typedef struct
 {
-// special case, only one target, provides a callback for scanhash to
-// submit work with less overhead.
-// bool (*submit_work )             ( struct thr_info*, const struct work* );
-
 // mandatory functions, must be overwritten
-// Added a 5th arg for the thread_info structure to replace the int thr id
-// in the first arg. Both will co-exist during the trasition.
-//int ( *scanhash ) ( int, struct work*, uint32_t, uint64_t* );
 int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );

 // optional unsafe, must be overwritten if algo uses function
@@ -131,8 +124,6 @@ void ( *get_new_work )           ( struct work*, struct work*, int, uint32_t*,
                                   bool );
 uint32_t *( *get_nonceptr )      ( uint32_t* );
 void ( *decode_extra_data )      ( struct work*, uint64_t* );
-void ( *wait_for_diff )          ( struct stratum_ctx* );
-int64_t ( *get_max64 )           ();
 bool ( *work_decode )            ( const json_t*, struct work* );
 bool ( *submit_getwork_result )  ( CURL*, struct work* );
 void ( *gen_merkle_root )        ( char*, struct stratum_ctx* );
@@ -200,8 +191,6 @@ void null_hash_suw();

 // optional safe targets, default listed first unless noted.

-void std_wait_for_diff();
-
 uint32_t *std_get_nonceptr( uint32_t *work_data );
 uint32_t *jr2_get_nonceptr( uint32_t *work_data );

@@ -216,14 +205,6 @@ void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *work );
 void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx );
 void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );

-// pick your favorite or define your own
-int64_t get_max64_0x1fffffLL(); // default
-int64_t get_max64_0x40LL();
-int64_t get_max64_0x3ffff();
-int64_t get_max64_0x3fffffLL();
-int64_t get_max64_0x1ffff();
-int64_t get_max64_0xffffLL();
-
 bool std_le_work_decode( const json_t *val, struct work *work );
 bool std_be_work_decode( const json_t *val, struct work *work );
 bool jr2_work_decode(    const json_t *val, struct work *work );
@@ -266,8 +247,8 @@ int std_get_work_data_size();
 // by calling the algo's register function.
 bool register_algo_gate( int algo, algo_gate_t *gate );

-// Override any default gate functions that are applicable and do any other
-// algo-specific initialization.
+// Called by algos toverride any default gate functions that are applicable
+// and do any other algo-specific initialization.
 // The register functions for all the algos can be declared here to reduce
 // compiler warnings but that's just more work for devs adding new algos.
 bool register_algo( algo_gate_t *gate );
@@ -280,5 +261,7 @@ bool register_json_rpc2( algo_gate_t *gate );
 // use this to call the hash function of an algo directly, ie util.c test.
 void exec_hash_function( int algo, void *output, const void *pdata );

-void get_algo_alias( char** algo_or_alias );
+// Validate a string as a known algo and alias, updates arg to proper
+// algo name if valid alias, NULL if invalid alias or algo.
+void get_algo_alias( char **algo_or_alias );

--- a/algo/argon2/argon2a/argon2a.c
+++ b/algo/argon2/argon2a/argon2a.c
@@ -74,18 +74,12 @@ int scanhash_argon2( struct work* work, uint32_t max_nonce,
 	return 0;
 }

-int64_t argon2_get_max64 ()
-{
-  return 0x1ffLL;
-}
-
 bool register_argon2_algo( algo_gate_t* gate )
 {
  gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
  gate->scanhash        = (void*)&scanhash_argon2;
  gate->hash            = (void*)&argon2hash;
  gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
-  gate->get_max64       = (void*)&argon2_get_max64;
  opt_target_factor = 65536.0;

  return true;
--- a/algo/argon2/argon2d/argon2d-gate.c
+++ b/algo/argon2/argon2d/argon2d-gate.c
@@ -179,12 +179,9 @@ int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
   return 0;
 }

-int64_t get_max64_0x1ff() { return 0x1ff; }
-
 bool register_argon2d4096_algo( algo_gate_t* gate )
 {
        gate->scanhash = (void*)&scanhash_argon2d4096;
-        gate->get_max64  = (void*)&get_max64_0x1ff;
        gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
        opt_target_factor = 65536.0;
        return true;
--- a/algo/blake/blake-gate.c
+++ b/algo/blake/blake-gate.c
@@ -1,18 +1,8 @@
 #include "blake-gate.h"

-int64_t blake_get_max64 ()
-{
-  return 0x7ffffLL;
-}
-
 bool register_blake_algo( algo_gate_t* gate )
 {
  gate->optimizations = AVX2_OPT;
-  gate->get_max64 = (void*)&blake_get_max64;
-//#if defined (__AVX2__) && defined (FOUR_WAY)
-//   gate->optimizations = SSE2_OPT | AVX2_OPT;
-//  gate->scanhash  = (void*)&scanhash_blake_8way;
-//  gate->hash      = (void*)&blakehash_8way;
 #if defined(BLAKE_4WAY)
  four_way_not_tested();
  gate->scanhash  = (void*)&scanhash_blake_4way;
--- a/algo/blake/blake2b-gate.c
+++ b/algo/blake/blake2b-gate.c
@@ -1,13 +1,5 @@
 #include "blake2b-gate.h"

-/*
-// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
-int64_t blake2s_get_max64 ()
-{
-   return 0x7ffffLL;
-}
-*/
-
 bool register_blake2b_algo( algo_gate_t* gate )
 {
 #if defined(BLAKE2B_4WAY)
@@ -17,7 +9,6 @@ bool register_blake2b_algo( algo_gate_t* gate )
  gate->scanhash  = (void*)&scanhash_blake2b;
  gate->hash      = (void*)&blake2b_hash;
 #endif
-//  gate->get_max64 = (void*)&blake2s_get_max64;
  gate->optimizations =  AVX2_OPT;
  return true;
 };
--- a/algo/blake/blake2s-gate.c
+++ b/algo/blake/blake2s-gate.c
@@ -1,12 +1,5 @@
 #include "blake2s-gate.h"

-
-// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
-int64_t blake2s_get_max64 ()
-{
-   return 0x7ffffLL;
-}
-
 bool register_blake2s_algo( algo_gate_t* gate )
 {
 #if defined(BLAKE2S_8WAY)
@@ -19,7 +12,6 @@ bool register_blake2s_algo( algo_gate_t* gate )
  gate->scanhash  = (void*)&scanhash_blake2s;
  gate->hash      = (void*)&blake2s_hash;
 #endif
-  gate->get_max64 = (void*)&blake2s_get_max64;
  gate->optimizations = SSE2_OPT | AVX2_OPT;
  return true;
 };
--- a/algo/blake/blake2s.c
+++ b/algo/blake/blake2s.c
@@ -70,18 +70,3 @@ int scanhash_blake2s( struct work *work,

 	return 0;
 }
-/*
-// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
-int64_t blake2s_get_max64 ()
-{
-   return 0x7ffffLL;
-}
-
-bool register_blake2s_algo( algo_gate_t* gate )
-{
-  gate->scanhash  = (void*)&scanhash_blake2s;
-  gate->hash      = (void*)&blake2s_hash;
-  gate->get_max64 = (void*)&blake2s_get_max64;
-  return true;
-};
-*/
--- a/algo/blake/blakecoin-gate.c
+++ b/algo/blake/blakecoin-gate.c
@@ -1,13 +1,6 @@
 #include "blakecoin-gate.h"
 #include <memory.h>

-// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
-int64_t blakecoin_get_max64 ()
-{
-  return 0x7ffffLL;
-//  return 0x3fffffLL;
-}
-
 // vanilla uses default gen merkle root, otherwise identical to blakecoin
 bool register_vanilla_algo( algo_gate_t* gate )
 {
@@ -23,7 +16,6 @@ bool register_vanilla_algo( algo_gate_t* gate )
  gate->hash     = (void*)&blakecoinhash;
 #endif
  gate->optimizations = SSE42_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&blakecoin_get_max64;
  return true;
 }

--- a/algo/blake/blakecoin.c
+++ b/algo/blake/blakecoin.c
@@ -93,33 +93,3 @@ int scanhash_blakecoin( struct work *work, uint32_t max_nonce,
 	return 0;
 }

-/*
-void blakecoin_gen_merkle_root ( char* merkle_root, struct stratum_ctx* sctx )
-{
- SHA256( sctx->job.coinbase, (int)sctx->job.coinbase_size, merkle_root );
-}
-*/
-/*
-// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
-int64_t blakecoin_get_max64 ()
-{
-  return 0x7ffffLL;
-}
-
-// vanilla uses default gen merkle root, otherwise identical to blakecoin
-bool register_vanilla_algo( algo_gate_t* gate )
-{
-    gate->scanhash = (void*)&scanhash_blakecoin;
-    gate->hash     = (void*)&blakecoinhash;
-    gate->get_max64 = (void*)&blakecoin_get_max64;
-    blakecoin_init( &blake_init_ctx );
-    return true;
-}
-
-bool register_blakecoin_algo( algo_gate_t* gate )
-{
-  register_vanilla_algo( gate );
-  gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
-  return true;
-}
-*/
--- a/algo/blake/decred-gate.c
+++ b/algo/blake/decred-gate.c
@@ -38,7 +38,7 @@ void decred_decode_extradata( struct work* work, uint64_t* net_blocks )
   if (!have_longpoll && work->height > *net_blocks + 1)
   {
      char netinfo[64] = { 0 };
-      if (opt_showdiff && net_diff > 0.)
+      if ( net_diff > 0. )
      {
         if (net_diff != work->targetdiff)
            sprintf(netinfo, ", diff %.3f, target %.1f", net_diff,
@@ -154,7 +154,6 @@ bool register_decred_algo( algo_gate_t* gate )
 #endif
  gate->optimizations = AVX2_OPT;
  gate->get_nonceptr          = (void*)&decred_get_nonceptr;
-  gate->get_max64             = (void*)&get_max64_0x3fffffLL;
  gate->decode_extra_data     = (void*)&decred_decode_extradata;
  gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
  gate->work_decode           = (void*)&std_be_work_decode;
--- a/algo/blake/decred.c
+++ b/algo/blake/decred.c
@@ -143,7 +143,7 @@ void decred_decode_extradata( struct work* work, uint64_t* net_blocks )
   if (!have_longpoll && work->height > *net_blocks + 1)
   {
      char netinfo[64] = { 0 };
-      if (opt_showdiff && net_diff > 0.)
+      if (net_diff > 0.)
      {
         if (net_diff != work->targetdiff)
 	    sprintf(netinfo, ", diff %.3f, target %.1f", net_diff,
@@ -269,7 +269,6 @@ bool register_decred_algo( algo_gate_t* gate )
  gate->scanhash              = (void*)&scanhash_decred;
  gate->hash                  = (void*)&decred_hash;
  gate->get_nonceptr          = (void*)&decred_get_nonceptr;
-  gate->get_max64             = (void*)&get_max64_0x3fffffLL;
  gate->decode_extra_data     = (void*)&decred_decode_extradata;
  gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
  gate->work_decode           = (void*)&std_be_work_decode;
--- a/algo/blake/pentablake-gate.c
+++ b/algo/blake/pentablake-gate.c
@@ -10,7 +10,6 @@ bool register_pentablake_algo( algo_gate_t* gate )
    gate->hash      = (void*)&pentablakehash;
 #endif
    gate->optimizations = AVX2_OPT;
-    gate->get_max64 = (void*)&get_max64_0x3ffff;
    return true;
 };

--- a/algo/bmw/bmw512-gate.c
+++ b/algo/bmw/bmw512-gate.c
@@ -1,11 +1,8 @@
 #include "bmw512-gate.h"

-int64_t bmw512_get_max64() { return 0x7ffffLL; }
-
 bool register_bmw512_algo( algo_gate_t* gate )
 {
  gate->optimizations = AVX2_OPT;
-  gate->get_max64       = (void*)&bmw512_get_max64;
  opt_target_factor = 256.0;
 #if defined (BMW512_4WAY)
  gate->scanhash  = (void*)&scanhash_bmw512_4way;
--- a/algo/cryptonight/cryptolight.c
+++ b/algo/cryptonight/cryptolight.c
@@ -363,7 +363,6 @@ bool register_cryptolight_algo( algo_gate_t* gate )
  gate->scanhash  = (void*)&scanhash_cryptolight;
  gate->hash      = (void*)&cryptolight_hash;
  gate->hash_suw  = (void*)&cryptolight_hash; 
-  gate->get_max64 = (void*)&get_max64_0x40LL;
  return true;
 };

--- a/algo/cryptonight/cryptonight-common.c
+++ b/algo/cryptonight/cryptonight-common.c
@@ -111,7 +111,6 @@ bool register_cryptonight_algo( algo_gate_t* gate )
  gate->scanhash         = (void*)&scanhash_cryptonight;
  gate->hash             = (void*)&cryptonight_hash;
  gate->hash_suw         = (void*)&cryptonight_hash_suw;  
-  gate->get_max64        = (void*)&get_max64_0x40LL;
  return true;
 };

@@ -123,7 +122,6 @@ bool register_cryptonightv7_algo( algo_gate_t* gate )
  gate->scanhash      = (void*)&scanhash_cryptonight;
  gate->hash          = (void*)&cryptonight_hash;
  gate->hash_suw      = (void*)&cryptonight_hash_suw;
-  gate->get_max64     = (void*)&get_max64_0x40LL;
  return true;
 };

--- a/algo/groestl/groestl.c
+++ b/algo/groestl/groestl.c
@@ -100,7 +100,6 @@ bool register_dmd_gr_algo( algo_gate_t* gate )
    gate->optimizations   = SSE2_OPT | AES_OPT;
    gate->scanhash        = (void*)&scanhash_groestl;
    gate->hash            = (void*)&groestlhash;
-    gate->get_max64       = (void*)&get_max64_0x3ffff;
    opt_target_factor = 256.0;
    return true;
 };
--- a/algo/groestl/myr-groestl.c
+++ b/algo/groestl/myr-groestl.c
@@ -88,15 +88,3 @@ int scanhash_myriad( struct work *work, uint32_t max_nonce,
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
-/*
-bool register_myriad_algo( algo_gate_t* gate )
-{
-    gate->optimizations = SSE2_OPT | AES_OPT;
-    init_myrgr_ctx();
-    gate->scanhash = (void*)&scanhash_myriad;
-    gate->hash     = (void*)&myriadhash;
-//    gate->hash_alt = (void*)&myriadhash;
-    gate->get_max64 = (void*)&get_max64_0x3ffff;
-    return true;
-};
-*/
--- a/algo/groestl/myrgr-gate.c
+++ b/algo/groestl/myrgr-gate.c
@@ -12,7 +12,6 @@ bool register_myriad_algo( algo_gate_t* gate )
  gate->hash      = (void*)&myriad_hash;
 #endif
  gate->optimizations = AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
  return true;
 };

--- a/algo/keccak/keccak-gate.c
+++ b/algo/keccak/keccak-gate.c
@@ -1,12 +1,10 @@
 #include "keccak-gate.h"

-int64_t keccak_get_max64() { return 0x7ffffLL; }

 bool register_keccak_algo( algo_gate_t* gate )
 {
  gate->optimizations = AVX2_OPT;
  gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
-  gate->get_max64       = (void*)&keccak_get_max64;
  opt_target_factor = 128.0;
 #if defined (KECCAK_4WAY)
  gate->scanhash  = (void*)&scanhash_keccak_4way;
@@ -22,7 +20,6 @@ bool register_keccakc_algo( algo_gate_t* gate )
 {
  gate->optimizations = AVX2_OPT;
  gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
-  gate->get_max64       = (void*)&keccak_get_max64;
  opt_target_factor = 256.0;
 #if defined (KECCAK_4WAY)
  gate->scanhash  = (void*)&scanhash_keccak_4way;
--- a/algo/lyra2/lyra2-gate.c
+++ b/algo/lyra2/lyra2-gate.c
@@ -127,7 +127,6 @@ bool register_lyra2z_algo( algo_gate_t* gate )
  gate->hash       = (void*)&lyra2z_hash;
 #endif
  gate->optimizations = SSE42_OPT | AVX2_OPT;
-  gate->get_max64  = (void*)&get_max64_0xffffLL;
  opt_target_factor = 256.0;
  return true;
 };
@@ -147,15 +146,12 @@ bool register_lyra2h_algo( algo_gate_t* gate )
  gate->hash       = (void*)&lyra2h_hash;
 #endif
  gate->optimizations = SSE42_OPT | AVX2_OPT;
-  gate->get_max64  = (void*)&get_max64_0xffffLL;
  opt_target_factor = 256.0;
  return true;
 };

 /////////////////////////////////

-int64_t allium_get_max64_0xFFFFLL() { return 0xFFFFLL; }
-
 bool register_allium_algo( algo_gate_t* gate )
 {
 #if defined (ALLIUM_4WAY)
@@ -168,7 +164,6 @@ bool register_allium_algo( algo_gate_t* gate )
  gate->hash      = (void*)&allium_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
-  gate->get_max64         = (void*)&allium_get_max64_0xFFFFLL;
  opt_target_factor = 256.0;
  return true;
 };
@@ -214,7 +209,6 @@ bool register_phi2_algo( algo_gate_t* gate )
   gate->get_work_data_size = (void*)&phi2_get_work_data_size;
   gate->decode_extra_data  = (void*)&phi2_decode_extra_data;
   gate->build_extraheader  = (void*)&phi2_build_extraheader;
-   gate->get_max64          = (void*)&get_max64_0xffffLL;
   opt_target_factor = 256.0;
 #if defined(PHI2_4WAY)
   gate->scanhash           = (void*)&scanhash_phi2_4way;
--- a/algo/lyra2/lyra2re.c
+++ b/algo/lyra2/lyra2re.c
@@ -113,18 +113,12 @@ int scanhash_lyra2re( struct work *work, uint32_t max_nonce,
 	return 0;
 }

-int64_t lyra2re_get_max64 ()
-{
-  return 0xffffLL;
-}
-
 bool register_lyra2re_algo( algo_gate_t* gate )
 {
  init_lyra2re_ctx();
  gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
  gate->scanhash   = (void*)&scanhash_lyra2re;
  gate->hash       = (void*)&lyra2re_hash;
-  gate->get_max64  = (void*)&lyra2re_get_max64;
  opt_target_factor = 128.0;
  return true;
 };
--- a/algo/lyra2/lyra2z330.c
+++ b/algo/lyra2/lyra2z330.c
@@ -70,7 +70,6 @@ bool register_lyra2z330_algo( algo_gate_t* gate )
  gate->miner_thread_init = (void*)&lyra2z330_thread_init;
  gate->scanhash   = (void*)&scanhash_lyra2z330;
  gate->hash       = (void*)&lyra2z330_hash;
-  gate->get_max64  = (void*)&get_max64_0xffffLL;
  opt_target_factor = 256.0;
  return true;
 };
--- a/algo/m7m.c
+++ b/algo/m7m.c
@@ -296,8 +296,6 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,

     pdata[19] = n;

-// can this be skipped after finding a share? Seems to work ok.
-//out:
     mpf_set_prec_raw(magifpi, prec0);
     mpf_set_prec_raw(magifpi0, prec0);
     mpf_set_prec_raw(mptmp, prec0);
@@ -323,7 +321,6 @@ bool register_m7m_algo( algo_gate_t *gate )
  gate->build_stratum_request = (void*)&std_be_build_stratum_request;
  gate->work_decode           = (void*)&std_be_work_decode;
  gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
-  gate->get_max64             = (void*)&get_max64_0x1ffff;
  gate->set_work_data_endian  = (void*)&set_work_data_big_endian;
  opt_target_factor = 65536.0;
  return true;
--- a/algo/nist5/zr5.c
+++ b/algo/nist5/zr5.c
@@ -208,12 +208,6 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
       ++(*nonceptr);
 }

-int64_t zr5_get_max64 ()
-{
-//  return 0x1ffffLL;
-  return 0x1fffffLL;
-}
-
 void zr5_display_pok( struct work* work )
 {
      if ( work->data[0] & 0x00008000 )
@@ -229,7 +223,6 @@ bool register_zr5_algo( algo_gate_t* gate )
    gate->get_new_work          = (void*)&zr5_get_new_work;
    gate->scanhash              = (void*)&scanhash_zr5;
    gate->hash                  = (void*)&zr5hash;
-    gate->get_max64             = (void*)&zr5_get_max64;
    gate->decode_extra_data     = (void*)&zr5_display_pok;
    gate->build_stratum_request = (void*)&std_be_build_stratum_request;
    gate->work_decode           = (void*)&std_be_work_decode;
--- a/algo/ripemd/lbry-gate.c
+++ b/algo/ripemd/lbry-gate.c
@@ -94,8 +94,6 @@ void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
   g_work->data[28] = 0x80000000;
 }

-int64_t lbry_get_max64() { return 0x1ffffLL; }
-
 int lbry_get_work_data_size() { return LBRY_WORK_DATA_SIZE; }

 bool register_lbry_algo( algo_gate_t* gate )
@@ -112,7 +110,6 @@ bool register_lbry_algo( algo_gate_t* gate )
  gate->hash                  = (void*)&lbry_hash;
 #endif
  gate->calc_network_diff     = (void*)&lbry_calc_network_diff;
-  gate->get_max64             = (void*)&lbry_get_max64;
  gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
 //  gate->build_block_header    = (void*)&build_block_header;
  gate->build_extraheader     = (void*)&lbry_build_extraheader;
--- a/algo/scrypt/neoscrypt.c
+++ b/algo/scrypt/neoscrypt.c
@@ -1070,17 +1070,6 @@ int scanhash_neoscrypt( struct work *work,
    return 0;
 }

-int64_t get_neoscrypt_max64() { return 0x3ffff; }
-
-void neoscrypt_wait_for_diff( struct stratum_ctx *stratum )
-{
-   while ( !stratum->job.diff )
-   {
-//      applog(LOG_DEBUG, "Waiting for Stratum to set the job difficulty");
-      sleep(1);
-   }
-}
-
 int neoscrypt_get_work_data_size () { return 80; }

 bool register_neoscrypt_algo( algo_gate_t* gate )
@@ -1088,8 +1077,6 @@ bool register_neoscrypt_algo( algo_gate_t* gate )
  gate->optimizations         = SSE2_OPT;
  gate->scanhash              = (void*)&scanhash_neoscrypt;
  gate->hash                  = (void*)&neoscrypt;
-  gate->get_max64             = (void*)&get_neoscrypt_max64;
-  gate->wait_for_diff         = (void*)&neoscrypt_wait_for_diff;
  gate->build_stratum_request = (void*)&std_be_build_stratum_request;
  gate->work_decode           = (void*)&std_be_work_decode;
  gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
--- a/algo/scrypt/pluck.c
+++ b/algo/scrypt/pluck.c
@@ -483,11 +483,6 @@ int scanhash_pluck( struct work *work, uint32_t max_nonce,
 	return 0;
 }

-int64_t pluck_get_max64 ()
-{
-  return 0x1ffLL;
-}
-
 bool pluck_miner_thread_init( int thr_id )
 { 
  scratchbuf = malloc( 128 * 1024 ); 
@@ -503,7 +498,6 @@ bool register_pluck_algo( algo_gate_t* gate )
  gate->miner_thread_init = (void*)&pluck_miner_thread_init;
  gate->scanhash         = (void*)&scanhash_pluck;
  gate->hash             = (void*)&pluck_hash;
-  gate->get_max64        = (void*)&pluck_get_max64;
  opt_target_factor = 65536.0;
  return true;
 };
--- a/algo/scrypt/scrypt.c
+++ b/algo/scrypt/scrypt.c
@@ -766,8 +766,6 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
 	return 0;
 }

-int64_t scrypt_get_max64() { return 0xfff; }
-
 bool scrypt_miner_thread_init( int thr_id )
 {
 scratchbuf = scrypt_buffer_alloc( scratchbuf_size );  
@@ -783,10 +781,8 @@ bool register_scrypt_algo( algo_gate_t* gate )
  gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
  gate->scanhash         = (void*)&scanhash_scrypt;
 //  gate->hash             = (void*)&scrypt_1024_1_1_256_24way;
-  gate->get_max64        = (void*)&scrypt_get_max64;
  opt_target_factor = 65536.0;

-
  if ( !opt_param_n )
  {
     opt_param_n = 1024;
--- a/algo/scryptjane/scrypt-jane.c
+++ b/algo/scryptjane/scrypt-jane.c
@@ -240,7 +240,6 @@ bool register_scryptjane_algo( algo_gate_t* gate )
 {
    gate->scanhash   = (void*)&scanhash_scryptjane;
    gate->hash       = (void*)&scryptjanehash;
-    gate->get_max64  = (void*)&get_max64_0x40LL;
    opt_target_factor = 65536.0;

    // figure out if arg in N or Nfactor
--- a/algo/sha/sha256t-gate.c
+++ b/algo/sha/sha256t-gate.c
@@ -15,7 +15,6 @@ bool register_sha256t_algo( algo_gate_t* gate )
    gate->scanhash   = (void*)&scanhash_sha256t;
    gate->hash       = (void*)&sha256t_hash;
 #endif
-    gate->get_max64  = (void*)&get_max64_0x3ffff;
    return true;
 }

@@ -34,7 +33,6 @@ bool register_sha256q_algo( algo_gate_t* gate )
    gate->scanhash   = (void*)&scanhash_sha256q;
    gate->hash       = (void*)&sha256q_hash;
 #endif
-    gate->get_max64  = (void*)&get_max64_0x3ffff;
    return true;

 }
--- a/algo/skein/skein-gate.c
+++ b/algo/skein/skein-gate.c
@@ -2,8 +2,6 @@
 #include "sph_skein.h"
 #include "skein-hash-4way.h"

-int64_t skein_get_max64() { return 0x7ffffLL; }
-
 bool register_skein_algo( algo_gate_t* gate )
 {
    gate->optimizations = AVX2_OPT | SHA_OPT;
@@ -14,7 +12,6 @@ bool register_skein_algo( algo_gate_t* gate )
    gate->scanhash  = (void*)&scanhash_skein;
    gate->hash      = (void*)&skeinhash;
 #endif
-    gate->get_max64 = (void*)&skein_get_max64;
    return true;
 };

--- a/algo/skein/skein2-gate.c
+++ b/algo/skein/skein2-gate.c
@@ -2,11 +2,6 @@
 #include <stdint.h>
 #include "sph_skein.h"

-int64_t skein2_get_max64 ()
-{
-  return 0x7ffffLL;
-}
-
 bool register_skein2_algo( algo_gate_t* gate )
 {
  gate->optimizations = AVX2_OPT;
@@ -17,7 +12,6 @@ bool register_skein2_algo( algo_gate_t* gate )
  gate->scanhash  = (void*)&scanhash_skein2;
  gate->hash      = (void*)&skein2hash;
 #endif
-  gate->get_max64 = (void*)&skein2_get_max64;
  return true;
 };

--- a/algo/sm3/sm3-hash-4way.c
+++ b/algo/sm3/sm3-hash-4way.c
@@ -181,7 +181,7 @@ void sm3_4way_compress( __m128i *digest, __m128i *block )
   for( j =0; j < 16; j++ )
   {
      SS1 = mm128_rol_32( _mm_add_epi32( _mm_add_epi32( mm128_rol_32(A,12), E ),
-                                      mm128_rol_32( T, j ) ), 7 );
+                                      mm128_rol_var_32( T, j ) ), 7 );
      SS2 = _mm_xor_si128( SS1, mm128_rol_32( A, 12 ) );
      TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF0( A, B, C ), D ),
                                          SS2 ), W1[j] );
@@ -200,9 +200,8 @@ void sm3_4way_compress( __m128i *digest, __m128i *block )
   T = _mm_set1_epi32( 0x7A879D8AUL );
   for( j =16; j < 64; j++ )
   {
-      // AVX512 _mm_rol_epi32 doesn't like using a variable for the second arg.
      SS1 = mm128_rol_32( _mm_add_epi32( _mm_add_epi32( mm128_rol_32(A,12), E ),
-                                      mm128_rol_32( T, j&31 ) ), 7 );
+                                      mm128_rol_var_32( T, j&31 ) ), 7 );
      SS2 = _mm_xor_si128( SS1, mm128_rol_32( A, 12 ) );
      TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF1( A, B, C ), D ), 
                                          SS2 ), W1[j] );
--- a/algo/x11/c11-gate.c
+++ b/algo/x11/c11-gate.c
@@ -12,7 +12,6 @@ bool register_c11_algo( algo_gate_t* gate )
  gate->hash      = (void*)&c11_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
  return true;
 };

--- a/algo/x11/fresh.c
+++ b/algo/x11/fresh.c
@@ -125,7 +125,6 @@ bool register_fresh_algo( algo_gate_t* gate )
    algo_not_tested();
    gate->scanhash   = (void*)&scanhash_fresh;
    gate->hash       = (void*)&freshhash;
-    gate->get_max64  = (void*)&get_max64_0x3ffff;
    opt_target_factor = 256.0;
    return true;
 };
--- a/algo/x11/timetravel-gate.c
+++ b/algo/x11/timetravel-gate.c
@@ -12,7 +12,6 @@ bool register_timetravel_algo( algo_gate_t* gate )
  gate->hash       = (void*)&timetravel_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64  = (void*)&get_max64_0xffffLL;
  opt_target_factor = 256.0;
  return true;
 };
--- a/algo/x11/timetravel10-gate.c
+++ b/algo/x11/timetravel10-gate.c
@@ -12,7 +12,6 @@ bool register_timetravel10_algo( algo_gate_t* gate )
  gate->hash       = (void*)&timetravel10_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64  = (void*)&get_max64_0xffffLL;
  opt_target_factor = 256.0;
  return true;
 };
--- a/algo/x11/tribus-gate.c
+++ b/algo/x11/tribus-gate.c
@@ -3,7 +3,6 @@
 bool register_tribus_algo( algo_gate_t* gate )
 {
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64     = (void*)&get_max64_0x1ffff;
 #if defined (TRIBUS_4WAY)
 //  init_tribus_4way_ctx();
  gate->scanhash      = (void*)&scanhash_tribus_4way;
--- a/algo/x11/x11-gate.c
+++ b/algo/x11/x11-gate.c
@@ -12,7 +12,6 @@ bool register_x11_algo( algo_gate_t* gate )
  gate->hash      = (void*)&x11_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
  return true;
 };

--- a/algo/x11/x11gost-gate.c
+++ b/algo/x11/x11gost-gate.c
@@ -12,7 +12,6 @@ bool register_x11gost_algo( algo_gate_t* gate )
  gate->hash      = (void*)&x11gost_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
  return true;
 };

--- a/algo/x12/x12-gate.c
+++ b/algo/x12/x12-gate.c
@@ -12,7 +12,6 @@ bool register_x12_algo( algo_gate_t* gate )
  gate->hash      = (void*)&x12hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
  return true;
 };

--- a/algo/x13/phi1612-gate.c
+++ b/algo/x13/phi1612-gate.c
@@ -12,7 +12,6 @@ bool register_phi1612_algo( algo_gate_t* gate )
  gate->hash      = (void*)&phi1612_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
  return true;
 };

--- a/algo/x13/x13-gate.c
+++ b/algo/x13/x13-gate.c
@@ -12,7 +12,6 @@ bool register_x13_algo( algo_gate_t* gate )
  gate->hash      = (void*)&x13hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
  return true;
 };

--- a/algo/x13/x13sm3-gate.c
+++ b/algo/x13/x13sm3-gate.c
@@ -12,7 +12,6 @@ bool register_x13sm3_algo( algo_gate_t* gate )
  gate->hash      = (void*)&x13sm3_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
  return true;
 };

@@ -28,7 +27,6 @@ bool register_x13bcd_algo( algo_gate_t* gate )
  gate->hash      = (void*)&x13bcd_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
  return true;
 };

--- a/algo/x14/axiom.c
+++ b/algo/x14/axiom.c
@@ -83,6 +83,5 @@ bool register_axiom_algo( algo_gate_t* gate )
 {
    gate->scanhash  = (void*)&scanhash_axiom;
    gate->hash      = (void*)&axiomhash;
-    gate->get_max64 = (void*)&get_max64_0x40LL;
    return true;
 }
--- a/algo/x14/polytimos-gate.c
+++ b/algo/x14/polytimos-gate.c
@@ -11,7 +11,6 @@ bool register_polytimos_algo( algo_gate_t* gate )
  gate->scanhash  = (void*)&scanhash_polytimos;
  gate->hash      = (void*)&polytimos_hash;
 #endif
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
  return true;
 };

--- a/algo/x14/veltor-gate.c
+++ b/algo/x14/veltor-gate.c
@@ -12,7 +12,6 @@ bool register_veltor_algo( algo_gate_t* gate )
  gate->hash      = (void*)&veltor_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
  return true;
 };

--- a/algo/x14/x14-gate.c
+++ b/algo/x14/x14-gate.c
@@ -12,7 +12,6 @@ bool register_x14_algo( algo_gate_t* gate )
  gate->hash      = (void*)&x14hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
  return true;
 };

--- a/algo/x17/sonoa-gate.c
+++ b/algo/x17/sonoa-gate.c
@@ -11,7 +11,6 @@ bool register_sonoa_algo( algo_gate_t* gate )
  gate->scanhash  = (void*)&scanhash_sonoa;
  gate->hash      = (void*)&sonoa_hash;
 #endif
-  gate->get_max64     = (void*)&get_max64_0x1ffff;
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
  return true;
 };
--- a/algo/x17/xevan-gate.c
+++ b/algo/x17/xevan-gate.c
@@ -12,7 +12,6 @@ bool register_xevan_algo( algo_gate_t* gate )
  gate->hash      = (void*)&xevan_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64  = (void*)&get_max64_0xffffLL;
  opt_target_factor = 256.0;
  return true;
 };
--- a/algo/yescrypt/yescrypt.c
+++ b/algo/yescrypt/yescrypt.c
@@ -416,16 +416,6 @@ int scanhash_yescrypt( struct work *work, uint32_t max_nonce,
        return 0;
 }

-int64_t yescrypt_get_max64()
-{
-  return 0x1ffLL;
-}
-
-int64_t yescryptr16_get_max64()
-{
-  return 0xfffLL;
-}
-
 void yescrypt_gate_base(algo_gate_t *gate )
 {
   gate->optimizations = SSE2_OPT | SHA_OPT;
@@ -437,7 +427,6 @@ void yescrypt_gate_base(algo_gate_t *gate )
 bool register_yescrypt_algo( algo_gate_t* gate )
 {
   yescrypt_gate_base( gate );
-   gate->get_max64  = (void*)&yescrypt_get_max64;

   if ( opt_param_n )  YESCRYPT_N = opt_param_n;
   else                YESCRYPT_N = 2048;
@@ -469,7 +458,6 @@ bool register_yescrypt_algo( algo_gate_t* gate )
 bool register_yescryptr8_algo( algo_gate_t* gate )
 {
   yescrypt_gate_base( gate );
-   gate->get_max64  = (void*)&yescrypt_get_max64;
   yescrypt_client_key = "Client Key";
   yescrypt_client_key_len = 10;
   YESCRYPT_N = 2048;
@@ -481,7 +469,6 @@ bool register_yescryptr8_algo( algo_gate_t* gate )
 bool register_yescryptr16_algo( algo_gate_t* gate )
 {
   yescrypt_gate_base( gate );
-   gate->get_max64  = (void*)&yescryptr16_get_max64;
   yescrypt_client_key = "Client Key";
   yescrypt_client_key_len = 10;
   YESCRYPT_N = 4096;   
@@ -493,7 +480,6 @@ bool register_yescryptr16_algo( algo_gate_t* gate )
 bool register_yescryptr32_algo( algo_gate_t* gate )
 {
   yescrypt_gate_base( gate );
-   gate->get_max64  = (void*)&yescryptr16_get_max64;
   yescrypt_client_key = "WaviBanana";
   yescrypt_client_key_len = 10;
   YESCRYPT_N = 4096;
--- a/algo/yespower/yespower-gate.c
+++ b/algo/yespower/yespower-gate.c
@@ -109,11 +109,6 @@ int scanhash_yespower_b2b( struct work *work, uint32_t max_nonce,
        return 0;
 }

-int64_t yespower_get_max64()
-{
-  return 0xfffLL;
-}
-
 bool register_yespower_algo( algo_gate_t* gate )
 {
  yespower_params.version = YESPOWER_1_0;
@@ -141,7 +136,6 @@ bool register_yespower_algo( algo_gate_t* gate )
     applog( LOG_NOTICE,"Key= \"%s\"\n", yespower_params.pers );

  gate->optimizations = SSE2_OPT;
-  gate->get_max64     = (void*)&yespower_get_max64;
  gate->scanhash      = (void*)&scanhash_yespower;
  gate->hash          = (void*)&yespower_hash;
  opt_target_factor = 65536.0;
@@ -156,7 +150,6 @@ bool register_yespowerr16_algo( algo_gate_t* gate )
  yespower_params.pers    = NULL;
  yespower_params.perslen = 0;
  gate->optimizations = SSE2_OPT;
-  gate->get_max64     = (void*)&yespower_get_max64;
  gate->scanhash      = (void*)&scanhash_yespower;
  gate->hash          = (void*)&yespower_hash;
  opt_target_factor = 65536.0;
@@ -164,21 +157,10 @@ bool register_yespowerr16_algo( algo_gate_t* gate )
 };


-int64_t yescrypt_05_get_max64()
-{
-  return 0x1ffLL;
-}
-
-int64_t yescryptr16_05_get_max64()
-{
-  return 0xfffLL;
-}
-
 bool register_yescrypt_05_algo( algo_gate_t* gate )
 {
   gate->optimizations = SSE2_OPT | SHA_OPT;
   gate->scanhash   = (void*)&scanhash_yespower;
-   gate->get_max64  = (void*)&yescrypt_05_get_max64;
   yespower_params.version = YESPOWER_0_5;
   yespower_params.N       = 2048;
   yespower_params.r       = 8;
@@ -192,7 +174,6 @@ bool register_yescryptr8_05_algo( algo_gate_t* gate )
 {
   gate->optimizations = SSE2_OPT | SHA_OPT;
   gate->scanhash   = (void*)&scanhash_yespower;
-   gate->get_max64  = (void*)&yescrypt_05_get_max64;
   yespower_params.version = YESPOWER_0_5;
   yespower_params.N       = 2048;
   yespower_params.r       = 8;
@@ -206,7 +187,6 @@ bool register_yescryptr16_05_algo( algo_gate_t* gate )
 {
   gate->optimizations = SSE2_OPT | SHA_OPT;
   gate->scanhash   = (void*)&scanhash_yespower;
-   gate->get_max64  = (void*)&yescryptr16_05_get_max64;
   yespower_params.version = YESPOWER_0_5;
   yespower_params.N       = 4096;
   yespower_params.r       = 16;
@@ -220,7 +200,6 @@ bool register_yescryptr32_05_algo( algo_gate_t* gate )
 {
   gate->optimizations = SSE2_OPT | SHA_OPT;
   gate->scanhash   = (void*)&scanhash_yespower;
-   gate->get_max64  = (void*)&yescryptr16_05_get_max64;
   yespower_params.version = YESPOWER_0_5;
   yespower_params.N       = 4096;
   yespower_params.r       = 32;
@@ -245,7 +224,6 @@ bool register_power2b_algo( algo_gate_t* gate )
  applog( LOG_NOTICE,"Key length= %d\n", yespower_params.perslen );

  gate->optimizations = SSE2_OPT;
-  gate->get_max64     = (void*)&yespower_get_max64;
  gate->scanhash      = (void*)&scanhash_yespower_b2b;
  gate->hash          = (void*)&yespower_b2b_hash;
  opt_target_factor = 65536.0;
@@ -286,7 +264,6 @@ bool register_yespower_b2b_algo( algo_gate_t* gate )
  }  

  gate->optimizations = SSE2_OPT;
-  gate->get_max64     = (void*)&yespower_get_max64;
  gate->scanhash      = (void*)&scanhash_yespower_b2b;
  gate->hash          = (void*)&yespower_b2b_hash;
  opt_target_factor = 65536.0;
--- a/api.c
+++ b/api.c
@@ -32,7 +32,7 @@
 #include <sys/types.h>

 #include "miner.h"
-
+#include "sysinfos.c"
 #ifndef WIN32
 # include <errno.h>
 # include <sys/socket.h>
@@ -105,7 +105,7 @@ extern double global_hashrate;
 #define USE_MONITORING
 extern float cpu_temp(int);
 extern uint32_t cpu_clock(int);
-extern int cpu_fanpercent(void);
+//extern int cpu_fanpercent(void);

 /***************************************************************/

--- a/20
+++ b/20
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.9.
+# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.9.1.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='cpuminer-opt'
 PACKAGE_TARNAME='cpuminer-opt'
-PACKAGE_VERSION='3.9.9'
-PACKAGE_STRING='cpuminer-opt 3.9.9'
+PACKAGE_VERSION='3.9.9.1'
+PACKAGE_STRING='cpuminer-opt 3.9.9.1'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''

@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures cpuminer-opt 3.9.9 to adapt to many kinds of systems.
+\`configure' configures cpuminer-opt 3.9.9.1 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@@ -1404,7 +1404,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of cpuminer-opt 3.9.9:";;
+     short | recursive ) echo "Configuration of cpuminer-opt 3.9.9.1:";;
   esac
  cat <<\_ACEOF

@@ -1509,7 +1509,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-cpuminer-opt configure 3.9.9
+cpuminer-opt configure 3.9.9.1
 generated by GNU Autoconf 2.69

 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by cpuminer-opt $as_me 3.9.9, which was
+It was created by cpuminer-opt $as_me 3.9.9.1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  $ $0 $@
@@ -2993,7 +2993,7 @@ fi

 # Define the identity of the package.
 PACKAGE='cpuminer-opt'
- VERSION='3.9.9'
+ VERSION='3.9.9.1'


 cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by cpuminer-opt $as_me 3.9.9, which was
+This file was extended by cpuminer-opt $as_me 3.9.9.1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-cpuminer-opt config.status 3.9.9
+cpuminer-opt config.status 3.9.9.1
 configured by $0, generated by GNU Autoconf 2.69,
  with options \\"\$ac_cs_config\\"

--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([cpuminer-opt], [3.9.9])
+AC_INIT([cpuminer-opt], [3.9.9.1])

 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -37,6 +37,7 @@
 #include <curl/curl.h>
 #include <jansson.h>
 #include <openssl/sha.h>
+#include "sysinfos.c"

 #ifdef WIN32
 #include <winsock2.h>
@@ -81,7 +82,6 @@ bool opt_debug_diff = false;
 bool opt_protocol = false;
 bool opt_benchmark = false;
 bool opt_redirect = true;
-bool opt_showdiff = true;
 bool opt_extranonce = true;
 bool want_longpoll = true;
 bool have_longpoll = false;
@@ -110,12 +110,16 @@ int opt_n_threads = 0;
 bool opt_reset_on_stale = false;

 // Windows doesn't support 128 bit affinity mask.
+// Need compile time and run time test.
 #if defined(__linux) && defined(GCC_INT128)  
 #define AFFINITY_USES_UINT128 1
-uint128_t opt_affinity = -1LL;
+uint128_t opt_affinity = -1;
+static bool affinity_uses_uint128 = true;
 #else
-uint64_t opt_affinity = -1LL;
+uint64_t opt_affinity = -1;
+static bool affinity_uses_uint128 = false;
 #endif
+
 int opt_priority = 0;
 int num_cpus = 1;
 int num_cpugroups = 1;
@@ -148,9 +152,10 @@ double opt_target_factor = 1.0;
 uint32_t zr5_pok = 0;
 bool opt_stratum_stats = false;
 bool opt_hash_meter = false;
-uint32_t accepted_share_count = 0ULL;
-uint32_t rejected_share_count = 0ULL;
-uint32_t solved_block_count = 0ULL;
+uint32_t submitted_share_count= 0;
+uint32_t accepted_share_count = 0;
+uint32_t rejected_share_count = 0;
+uint32_t solved_block_count = 0;
 double *thr_hashrates;
 double *thr_hashcount;
 double global_hashcount = 0;
@@ -212,9 +217,9 @@ static inline void drop_policy(void)

 // Linux affinity can use int128.
 #if AFFINITY_USES_UINT128
-static void affine_to_cpu_mask( int id, unsigned __int128 mask )
+static void affine_to_cpu_mask( int id, uint128_t mask )
 #else
-static void affine_to_cpu_mask( int id, unsigned long long mask )
+static void affine_to_cpu_mask( int id, uint64_t mask )
 #endif
 {
   cpu_set_t set;
@@ -225,9 +230,9 @@ static void affine_to_cpu_mask( int id, unsigned long long mask )
   {
      // cpu mask
 #if AFFINITY_USES_UINT128
-      if( ( mask & ( (unsigned __int128)1ULL << i ) ) )  CPU_SET( i, &set );
+      if( ( mask & ( (uint128_t)1 << i ) ) )  CPU_SET( i, &set );
 #else
-      if( (ncpus > 64) || ( mask & (1ULL << i) ) )  CPU_SET( i, &set );
+      if( (ncpus > 64) || ( mask & (1 << i) ) )  CPU_SET( i, &set );
 #endif
   }
   if ( id == -1 )
@@ -246,7 +251,7 @@ static void affine_to_cpu_mask( int id, unsigned long long mask )
 static inline void drop_policy(void) { }

 // Windows CPU groups to manage more than 64 CPUs.
-static void affine_to_cpu_mask( int id, unsigned long mask )
+static void affine_to_cpu_mask( int id, uint64_t mask )
 {
   bool success;
   unsigned long last_error;    
@@ -268,14 +273,13 @@ static void affine_to_cpu_mask( int id, unsigned long mask )
 	   for( group = 0; group < num_cpugroups; group++ )
 	   {
 	      int cpus = GetActiveProcessorCount( group );
- 	   if ( cpu < cpus )
-	      break;
-
+ 	      if ( cpu < cpus )  break;
  	      cpu -= cpus;
      }

 	   if (opt_debug)
-	applog(LOG_DEBUG, "Binding thread %d to cpu %d on cpu group %d (mask %x)", id, cpu, group, (1ULL << cpu));
+         applog(LOG_DEBUG, "Binding thread %d to cpu %d on cpu group %d (mask %x)",
+               id, cpu, group, (1ULL << cpu));

 	   GROUP_AFFINITY affinity;
 	   affinity.Group = group;
@@ -290,9 +294,9 @@ static void affine_to_cpu_mask( int id, unsigned long mask )
   if (!success)
   {
 	   last_error = GetLastError();
-	applog(LOG_WARNING, "affine_to_cpu_mask for %u returned %x", id, last_error);
+	   applog(LOG_WARNING, "affine_to_cpu_mask for %u returned %x",
+               id, last_error);
   }
-
 }

 #else
@@ -831,21 +835,22 @@ void scale_hash_for_display ( double* hashrate, char* prefix )
     {  *prefix = 'E';  *hashrate /= 1e18;  }
 }

-static inline void sprintf_et( char *str, uint64_t seconds )
+static inline void sprintf_et( char *str, int seconds )
 {
-   uint64_t min = seconds / 60;
-   uint64_t sec = seconds % 60;
-   uint64_t hrs = min / 60;
+   // sprintf doesn't like uint64_t, Linux thinks it's long, Windows long long.
+   unsigned int min = seconds / 60;
+   unsigned int sec = seconds % 60;
+   unsigned int hrs = min / 60;
   if ( hrs )   
   {
-      uint64_t days = hrs / 24;
+      unsigned int days = hrs / 24;
      if ( days )  //0d00h
-         sprintf( str, "%llud%02lluh", days, hrs % 24 );
+         sprintf( str, "%ud%02uh", days, hrs % 24 );
      else         // 0h00m  
-         sprintf( str, "%lluh%02llum", hrs, min % 60 );
+         sprintf( str, "%uh%02um", hrs, min % 60 );
   }
   else         // 0m00s
-      sprintf( str, "%llum%02llus", min, sec );
+      sprintf( str, "%um%02us", min, sec );
 }
   
 // Bitcoin formula for converting difficulty to an equivalent
@@ -859,23 +864,33 @@ static inline void sprintf_et( char *str, uint64_t seconds )

 const double diff_to_hash = 4294967296.;

+static struct   timeval session_start;
 static struct   timeval five_min_start;
-static double   time_sum    = 0.;
 static double   latency_sum = 0.;
 static uint64_t submit_sum  = 0;
+static uint64_t accept_sum  = 0;
 static uint64_t reject_sum  = 0;
+static double   norm_diff_sum = 0.;
 static uint32_t last_block_height = 0;
 static double   last_targetdiff = 0.;
+static double   ref_rate_hi = 0.;
+static double   ref_rate_lo = 1e100;
+#if !(defined(__WINDOWS__) || defined(__WIN64))
+static uint32_t hi_temp = 0;
+#endif
+//static uint32_t stratum_errors = 0;

 struct share_stats_t
 {
   struct timeval submit_time;
   double net_diff;
   double share_diff;
+   double stratum_diff;
+   double target_diff;
 };

 #define s_stats_size 8
-static struct share_stats_t share_stats[ s_stats_size ];
+static struct share_stats_t share_stats[ s_stats_size ] = {0};
 static int s_get_ptr = 0, s_put_ptr = 0;
 static struct timeval last_submit_time = {0};

@@ -886,77 +901,102 @@ static inline int stats_ptr_incr( int p )

 void report_summary_log( bool force )
 {
-   struct timeval now, et;
+   struct timeval now, et, uptime, start_time;

   pthread_mutex_lock( &stats_lock );

   gettimeofday( &now, NULL );
   timeval_subtract( &et, &now, &five_min_start );

-   if ( !force && et.tv_sec < 300 )
+   if ( !( force && ( submit_sum || ( et.tv_sec > 5 ) ) )
+        && ( et.tv_sec < 300 ) )
   {
      pthread_mutex_unlock( &stats_lock );
      return;
   }
   
-   // collect and reset global counters
-   double   time     = time_sum;    time_sum    = 0.;
+   // collect and reset periodic counters
   uint64_t submits = submit_sum;  submit_sum = 0;
+   uint64_t accepts = accept_sum;  accept_sum = 0;
   uint64_t rejects = reject_sum;  reject_sum = 0;
-   int      latency  = latency_sum; latency_sum = 0;
+//   int      latency  = latency_sum; latency_sum = 0;
+   memcpy( &start_time, &five_min_start, sizeof start_time );
   memcpy( &five_min_start, &now, sizeof now );

   pthread_mutex_unlock( &stats_lock );

+   timeval_subtract( &et, &now, &start_time );
+   timeval_subtract( &uptime, &now, &session_start );
+   
+   double share_time = (double)et.tv_sec + (double)et.tv_usec / 1e6;
   double ghrate = global_hashrate;
   double scaled_ghrate = ghrate;
-   double   shrate = time == 0. ? 0. : diff_to_hash * last_targetdiff
-                                       * (double)(submits - rejects)  / time;
+   double shrate = share_time == 0. ? 0. : diff_to_hash * last_targetdiff
+                                           * (double)(accepts) / share_time;
+   double sess_hrate = uptime.tv_sec == 0. ? 0. : diff_to_hash * norm_diff_sum
+                                                   / (double)uptime.tv_sec;
   double scaled_shrate = shrate;
-   int      avg_latency = 0;
-   double   latency_pc  = 0.;
+//   int    avg_latency = 0;
+//   double latency_pc  = 0.;
   double submit_rate = 0.;
   char shr_units[4] = {0};
   char ghr_units[4] = {0};
+   char sess_hr_units[4] = {0};
   char et_str[24];
+   char upt_str[24];

-   if ( submits )
-      avg_latency = latency / submits;
+//   if ( submits )  avg_latency = latency / submits;

-   if ( time != 0. )
+   if ( share_time != 0. )
   {
-      submit_rate = (double)submits*60. / time;
-      latency_pc =  (double)latency / (time * 10.);
+      submit_rate = (double)submits*60. / share_time;
+//      latency_pc =  (double)latency / (share_time * 10.);
   }

+   if ( ghrate > ref_rate_hi )  ref_rate_hi = ghrate;
+   if ( ghrate < ref_rate_lo )  ref_rate_lo = ghrate;
+
   scale_hash_for_display( &scaled_shrate, shr_units );
   scale_hash_for_display( &scaled_ghrate, ghr_units );
+   scale_hash_for_display( &sess_hrate, sess_hr_units );
+
   sprintf_et( et_str, et.tv_sec );
+   sprintf_et( upt_str, uptime.tv_sec );

-   applog( LOG_NOTICE, "Submitted %d shares in %s, %.2f /min, %ld rejected",
-                        submits, et_str, submit_rate, rejects );
-   applog2( LOG_INFO, "Share eqv: %.2f %sh/s, miner ref: %.2f %sh/s",
-           scaled_shrate, shr_units, scaled_ghrate, ghr_units );
+   applog( LOG_NOTICE, "Periodic Report     %s        %s", et_str, upt_str );
+   applog2( LOG_INFO, "Share rate        %.2f/min     %.2f/min",
+                      submit_rate, (double)submitted_share_count*60. /
+                    ( (double)uptime.tv_sec + (double)uptime.tv_usec / 1e6 ) );
+   applog2( LOG_INFO, "Hash rate       %7.2f%sh/s   %7.2f%sh/s   (%.2f%sh/s)",
+                     scaled_shrate, shr_units, sess_hrate, sess_hr_units, 
+                     scaled_ghrate, ghr_units );
+   applog2( LOG_INFO,"Submitted        %6d       %6d",
+                       submits, submitted_share_count );
+   applog2( LOG_INFO,"Accepted         %6d       %6d",
+                       accepts, accepted_share_count );
+   applog2( LOG_INFO,"Rejected         %6d       %6d",
+                       rejects, rejected_share_count );
+//   applog2( LOG_INFO,"Blocks solved                  %6d",
+//                              solved_block_count );
+
+#if !(defined(__WINDOWS__) || defined(__WIN64))

-#if ((defined(_WIN64) || defined(__WINDOWS__)))
-   applog2( LOG_INFO, "Network latency %d ms (%.2f%%)",
-                       avg_latency, latency_pc );
-#else
   int temp = cpu_temp(0);
   char tempstr[32];
+   if ( temp > hi_temp ) hi_temp = temp;

   if ( use_colors && ( temp >= 70 ) )
   {
      if ( temp >= 80 )
-         sprintf( tempstr, "%sCPU temp %d C%s", CL_WHT CL_RED, temp, CL_N );
+         sprintf( tempstr, "%s%dC%s", CL_WHT CL_RED, temp, CL_N );
      else
-         sprintf( tempstr, "%sCPU temp %d C%s", CL_WHT CL_YLW, temp, CL_N );
+         sprintf( tempstr, "%s%dC%s", CL_WHT CL_YLW, temp, CL_N );
   }
   else
-      sprintf( tempstr, "CPU temp %d C", temp );
+      sprintf( tempstr, "%dC", temp );
+
+   applog2(LOG_INFO,"CPU temp             %s      max %dC", tempstr, hi_temp );

-   applog2( LOG_INFO, "Network latency %d ms (%.2f%%), %s",
-                      avg_latency, latency_pc, tempstr );
 #endif
 }

@@ -983,6 +1023,7 @@ static int share_result( int result, struct work *null_work,
   }
   else
   {
+      // empty queue, it must have overflowed and stats were lost for a share.
      pthread_mutex_unlock( &stats_lock );
      applog(LOG_WARNING,"Pending shares overflow, stats for share are lost.");
   }
@@ -992,9 +1033,9 @@ static int share_result( int result, struct work *null_work,
   {
      gettimeofday( &ack_time, NULL );
      timeval_subtract( &latency_tv, &ack_time, &my_stats.submit_time );
-      latency = ( latency_tv.tv_sec * 1000  + latency_tv.tv_usec / 1000 );
+      latency = ( latency_tv.tv_sec * 1e3  + latency_tv.tv_usec / 1e3 );
      timeval_subtract( &et, &my_stats.submit_time, &last_submit_time );
-      share_time = (double)et.tv_sec + ( (double)et.tv_usec / 1000000. );
+      share_time = (double)et.tv_sec + ( (double)et.tv_usec / 1e6 );
      memcpy( &last_submit_time, &my_stats.submit_time,
              sizeof last_submit_time );
   }
@@ -1003,11 +1044,23 @@ static int share_result( int result, struct work *null_work,
                                                my_stats.net_diff * 100.;

   // check result
+   if ( result )
+   {
+      accepted_share_count++;
+      if ( ( my_stats.net_diff > 0. ) && ( my_stats.share_diff >= net_diff ) )
+      {
+         solved = true;
+         solved_block_count++;
+      }
+   }
+   else
+      rejected_share_count++;
+/*
   result ? accepted_share_count++ : rejected_share_count++;
   solved = result && (my_stats.net_diff > 0.0 )
            && ( my_stats.share_diff >= net_diff );
   solved_block_count += solved ? 1 : 0 ;
-
+*/
   // update global counters for summary report
   pthread_mutex_lock( &stats_lock );

@@ -1019,9 +1072,14 @@ static int share_result( int result, struct work *null_work,
   global_hashcount = hashcount;
   global_hashrate = hashrate;
   
-   time_sum    += share_time;
-   submit_sum  ++;
-   reject_sum  += (uint64_t)!result;
+   if ( result ) 
+   {
+      accept_sum++;
+      norm_diff_sum += my_stats.target_diff;
+   }
+   else
+      reject_sum++;
+   submit_sum++;
   latency_sum += latency;

   pthread_mutex_unlock( &stats_lock );
@@ -1057,7 +1115,7 @@ static int share_result( int result, struct work *null_work,
         bin2hex( str3, (unsigned char*)str2, 12 );
         applog2( LOG_INFO, "Hash:   %s...", str3 );

-         diff_to_target( str1, last_targetdiff );
+         diff_to_target( str1, my_stats.target_diff );
         for ( int i = 0; i < 8; i++ )
            be32enc( str2 + i, str1[7 - i] );
         bin2hex( str3, (unsigned char*)str2, 12 );
@@ -1569,13 +1627,14 @@ static void *workio_thread(void *userdata)
 	bool ok = true;

 	curl = curl_easy_init();
-	if (unlikely(!curl))
+	if (unlikely( !curl ) )
   {
 		applog(LOG_ERR, "CURL initialization failed");
 		return NULL;
 	}
-	if(jsonrpc_2 && !have_stratum)
-		ok = rpc2_workio_login(curl);
+	if ( jsonrpc_2 && !have_stratum )
+		ok = rpc2_workio_login( curl );
+
   while (ok)
   {
 		struct workio_cmd *wc;
@@ -1604,6 +1663,7 @@ static void *workio_thread(void *userdata)
 		}
 		workio_cmd_free(wc);
 	}
+
   tq_freeze(mythr->q);
 	curl_easy_cleanup(curl);
 	return NULL;
@@ -1695,17 +1755,18 @@ void work_set_target_ratio( struct work* work, uint32_t* hash )
      work->sharediff = 0.;

   // collect some share stats
+   // Frequent share submission combined with high latency can caused
+   // shares to be submitted faster than they are acked. If severe enough
+   // it can overflow the queue and overwrite stats for a share.
   pthread_mutex_lock( &stats_lock );

-   // if buffer full discard the stats and don't increment pointer.
-   // We're on the clock so let share_result report it.
-   if ( share_stats[ s_put_ptr ].submit_time.tv_sec == 0 )
-   {
   gettimeofday( &share_stats[ s_put_ptr ].submit_time, NULL );
   share_stats[ s_put_ptr ].share_diff = work->sharediff;
   share_stats[ s_put_ptr ].net_diff = net_diff;
+   share_stats[ s_put_ptr ].stratum_diff = stratum_diff;
+   share_stats[ s_put_ptr ].target_diff = work->targetdiff;
+
   s_put_ptr = stats_ptr_incr( s_put_ptr );
-   }

   pthread_mutex_unlock( &stats_lock );
 }
@@ -1715,10 +1776,11 @@ bool submit_solution( struct work *work, void *hash,
 {
  if ( submit_work( thr, work ) )
  {
+     submitted_share_count++;
     work_set_target_ratio( work, hash );
     if ( !opt_quiet )
        applog( LOG_BLUE, "Share %d submitted by thread %d",
-            accepted_share_count + rejected_share_count + 1, thr->id );
+            submitted_share_count, thr->id );
     return true;
  }
  else
@@ -1731,10 +1793,11 @@ bool submit_lane_solution( struct work *work, void *hash,
 {
  if ( submit_work( thr, work ) )
  {
+     submitted_share_count++;
     work_set_target_ratio( work, hash );
     if ( !opt_quiet )
        applog( LOG_BLUE, "Share %d submitted by thread %d, lane %d",
-            accepted_share_count + rejected_share_count + 1, thr->id, lane );
+            submitted_share_count, thr->id, lane );
     return true;
  }
  else
@@ -1793,22 +1856,8 @@ static bool wanna_mine(int thr_id)
 	return state;
 }

-void std_wait_for_diff()
-{
-   while ( time(NULL) >= g_work_time + 120 )
-     sleep(1);
-}
-
 // Common target functions, default usually listed first.

-// pick your favorite or define your own
-int64_t get_max64_0x1fffffLL() { return 0x1fffffLL; } // default
-int64_t get_max64_0x40LL()     { return 0x40LL;     }
-int64_t get_max64_0x3ffff()    { return 0x3ffff;    }
-int64_t get_max64_0x3fffffLL() { return 0x3fffffLL; }
-int64_t get_max64_0x1ffff()    { return 0x1ffff;    }
-int64_t get_max64_0xffffLL()   { return 0xffffLL;   };
-
 // default
 void sha256d_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
 {
@@ -1943,9 +1992,11 @@ static void *miner_thread( void *userdata )
   // what is an appropriate value that is completely neutral?
   // zero seems to work. No, it breaks benchmark.
 //   uint32_t end_nonce = 0;
-   uint32_t end_nonce = opt_benchmark
-                      ? ( 0xffffffffU / opt_n_threads ) * (thr_id + 1) - 0x20
-                      : 0;
+//   uint32_t end_nonce = opt_benchmark
+//                      ? ( 0xffffffffU / opt_n_threads ) * (thr_id + 1) - 0x20
+//                      : 0;
+   uint32_t end_nonce = 0xffffffffU / opt_n_threads  * (thr_id + 1) - 0x20;
+
   time_t   firstwork_time = 0;
   int  i;
   memset( &work, 0, sizeof(work) );
@@ -1996,37 +2047,42 @@ static void *miner_thread( void *userdata )
      // Default affinity
      if ( (opt_affinity == (uint128_t)(-1) ) && opt_n_threads > 1 )
      {  
+         affine_to_cpu_mask( thr_id, (uint128_t)1 << (thr_id % num_cpus) );
         if ( opt_debug )
            applog( LOG_DEBUG, "Binding thread %d to cpu %d.",
                    thr_id, thr_id % num_cpus,
 	                 u128_hi64( (uint128_t)1 << (thr_id % num_cpus) ),
 		              u128_lo64( (uint128_t)1 << (thr_id % num_cpus) ) );
-         affine_to_cpu_mask( thr_id, (uint128_t)1 << (thr_id % num_cpus) );
      }
 #else
-      if ( (opt_affinity == -1LL) && opt_n_threads > 1 ) 
+      if ( ( opt_affinity == -1 ) && ( opt_n_threads > 1 ) ) 
      {
+         affine_to_cpu_mask( thr_id, 1 << (thr_id % num_cpus) );
         if (opt_debug)
            applog( LOG_DEBUG, "Binding thread %d to cpu %d.",
-                thr_id, thr_id % num_cpus, 1LL << (thr_id % num_cpus)) ;
-         affine_to_cpu_mask( thr_id, 1ULL << (thr_id % num_cpus) );
+                thr_id, thr_id % num_cpus, 1 << (thr_id % num_cpus)) ;
      }
 #endif
      else   // Custom affinity
      {
+         affine_to_cpu_mask( thr_id, opt_affinity );
+         if ( opt_debug )
+         {
 #if AFFINITY_USES_UINT128
-         if (opt_debug)
+            if ( num_cpus > 64 )
               applog( LOG_DEBUG, "Binding thread %d to mask %016llx %016llx",
                                thr_id, u128_hi64( opt_affinity ), 
                                        u128_lo64( opt_affinity ) );
+            else
+               applog( LOG_DEBUG, "Binding thread %d to mask %016llx",
+                                 thr_id, opt_affinity );
 #else
-         if (opt_debug)
            applog( LOG_DEBUG, "Binding thread %d to mask %016llx",
                                 thr_id, opt_affinity );
 #endif
-      affine_to_cpu_mask( thr_id, opt_affinity );
         }
      }
+   }  // num_cpus > 1

   if ( !algo_gate.miner_thread_init( thr_id ) )
   {
@@ -2034,18 +2090,20 @@ static void *miner_thread( void *userdata )
      exit (1);
   }

+   // wait for stratum to send first job
+   if ( have_stratum ) while ( !stratum.job.job_id ) sleep(1);
+
   while (1)
   {
       uint64_t hashes_done;
       struct timeval tv_start, tv_end, diff;
-       int64_t max64;
+       int64_t max64 = 1000;
       int nonce_found = 0;

       if ( algo_gate.do_this_thread( thr_id ) )
       {
          if ( have_stratum )
          {
-              algo_gate.wait_for_diff( &stratum );
      	     pthread_mutex_lock( &g_work_lock );
              if ( *algo_gate.get_nonceptr( work.data ) >= end_nonce )
                 algo_gate.stratum_gen_work( &stratum, &g_work );
@@ -2117,11 +2175,13 @@ static void *miner_thread( void *userdata )
          }
          if ( remain < max64 ) max64 = remain;
       }
-       // max64
+       // Select nonce range for approx 1 min duration based
+       // on hashrate, initial value arbitrarilly set to 1000 just to get
+       // a sample hashrate for the next time.
       uint32_t work_nonce = *( algo_gate.get_nonceptr( work.data ) );
-       max64 *= thr_hashrates[thr_id];
+       max64 = 60 * thr_hashrates[thr_id];
       if ( max64 <= 0)
-          max64 = (int64_t)algo_gate.get_max64();
+          max64 = 1000;
       if ( work_nonce + max64 > end_nonce )
          max_nonce = end_nonce;
       else
@@ -2171,8 +2231,6 @@ static void *miner_thread( void *userdata )
             pthread_mutex_unlock( &g_work_lock );
          }
       }
-       // Check if time for summary report
-       report_summary_log( false );
       // display hashrate
       if ( !opt_quiet )
       {
@@ -2199,28 +2257,6 @@ static void *miner_thread( void *userdata )
                                  thr_id, hc, hc_units, hr, hr_units );
             }
          }
-/*
-          if ( thr_id == 0 && !opt_benchmark )
-          {
-             hashcount = 0.;
-             hashrate = 0.;
-             for ( i = 0; i < opt_n_threads; i++ )
-             {
-                 hashrate  += thr_hashrates[i];
-                 hashcount += thr_hashcount[i];
-             }
-             if ( hashcount != 0. )
-             {
-                scale_hash_for_display( &hashcount, hc_units );
-                scale_hash_for_display( &hashrate,  hr_units );
-                if ( hc_units[0] )
-                   sprintf( hc, "%.2f", hashcount );
-                else  // no fractions of a hash
-                   sprintf( hc, "%.0f", hashcount );
-                sprintf( hr, "%.2f", hashrate );
-             }
-          }
-*/
       }

       // Display benchmark total
@@ -2261,7 +2297,7 @@ static void *miner_thread( void *userdata )
 #endif
             }
 	       }
-       }
+       }  // benchmark
   }  // miner_thread loop

 out:
@@ -2582,11 +2618,7 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
       char share_ttf[32];
  
       if ( stratum_diff != sctx->job.diff )
-       {
-          // If diff is changing report summary from old diff first.       
-          report_summary_log( stratum_diff != 0. );
          applog( LOG_BLUE, "New stratum difficulty" );
-       }
       if ( last_block_height != sctx->block_height )
          applog( LOG_BLUE, "New block" );

@@ -2614,6 +2646,8 @@ void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
   work_free( g_work );
   work_copy( g_work, &sctx->work );
   pthread_mutex_unlock( &sctx->work_lock );
+   if ( last_block_height != stratum.block_height )
+       last_block_height = stratum.block_height;
 }

 static void *stratum_thread(void *userdata )
@@ -2642,6 +2676,8 @@ static void *stratum_thread(void *userdata )
          }
          else // if ( !opt_quiet )
 	          applog(LOG_WARNING, "Stratum connection reset");
+          // reset stats queue as well
+          s_get_ptr = s_put_ptr = 0;
      }

      while ( !stratum.curl )
@@ -2673,6 +2709,9 @@ static void *stratum_thread(void *userdata )
         }
      }

+      report_summary_log( ( stratum_diff != stratum.job.diff )
+                       && ( stratum_diff != 0. ) );
+      
      if ( stratum.job.job_id
          && ( !g_work_time || strcmp( stratum.job.job_id, g_work.job_id ) ) )
      {
@@ -2682,40 +2721,53 @@ static void *stratum_thread(void *userdata )
         pthread_mutex_unlock(&g_work_lock);
         restart_threads();

+/*
         if ( stratum.job.clean || jsonrpc_2 )
         {
            static uint32_t last_block_height;
            if ( last_block_height != stratum.block_height )
            {
               last_block_height = stratum.block_height;
-/*
-               if ( !opt_quiet )
-               {
-                  if ( net_diff > 0. )
-                     applog( LOG_BLUE,
-                             "%s block %d, job %s, network diff %.4f",
-                             algo_names[opt_algo], stratum.bloc_height,
-                             g_work.job_id, net_diff);
-                  else
-	                  applog( LOG_BLUE, "%s %s block %d, job %s",
-                             short_url, algo_names[opt_algo],
-                             stratum.bloc_height, g_work.job_id );
            }
-*/
-            }
-//            else if ( !opt_quiet )
-//               applog( LOG_BLUE,"New job %s.", g_work.job_id );

         }
-         else if (opt_debug && !opt_quiet)
+         else
+*/
+         if (opt_debug && !opt_quiet)
         {
            applog( LOG_BLUE, "%s asks job %d for block %d", short_url,
                strtoul( stratum.job.job_id, NULL, 16 ), stratum.block_height );
         }
      }  // stratum.job.job_id

+     if ( stratum_socket_full( &stratum, opt_timeout ) )
+     {
+        s = stratum_recv_line(&stratum);
+        if ( !s )
+           applog(LOG_WARNING, "Stratum connection interrupted");
+     }
+     else
+     {
+        s = NULL;
+        applog(LOG_ERR, "Stratum connection timeout");
+     }
+
+     if ( s )
+     {
+        if ( !stratum_handle_method( &stratum, s ) )
+           stratum_handle_response( s );
+        free( s );
+     }
+     else
+     {
+        // stratum_errors++;
+        // check if this redundant
+        stratum_disconnect( &stratum );
+     }   
+/*
     if ( !stratum_socket_full( &stratum, opt_timeout ) )
     {
+        stratum_errors++;
        applog(LOG_ERR, "Stratum connection timeout");
        s = NULL;
     }
@@ -2724,12 +2776,13 @@ static void *stratum_thread(void *userdata )
     if ( !s )
     {
        stratum_disconnect(&stratum);
-//	  applog(LOG_WARNING, "Stratum connection interrupted");
+        applog(LOG_WARNING, "Stratum connection interrupted");
        continue;
     }
     if (!stratum_handle_method(&stratum, s))
          stratum_handle_response(s);
     free(s);
+*/
   }  // loop
 out:
  return NULL;
@@ -3074,10 +3127,6 @@ void parse_arg(int key, char *arg )
 	case 1012:
 		opt_extranonce = false;
 		break;
-	case 1013:
-      applog( LOG_WARNING, "hide-diff option is deprecated and has no effect.\n                      It will be removed in a future release. Stop using it.");
-		opt_showdiff = false;
-		break;
   case 1014:   // hash-meter
      opt_hash_meter = true;
      break;
@@ -3124,7 +3173,7 @@ void parse_arg(int key, char *arg )
 // than 64 CPUs, otherwise zero extend the upper half.
                opt_affinity = (uint128_t)ul;
                if ( num_cpus > 64 )
-                   opt_affinity = (opt_affinity << 64 ) | (uint128_t)ul;
+                   opt_affinity = (opt_affinity << 64 ) | opt_affinity;
 #else
                   opt_affinity = ul;
 #endif
@@ -3303,10 +3352,10 @@ bool check_cpu_capability ()
     bool cpu_has_sse2   = has_sse2();
     bool cpu_has_aes    = has_aes_ni();
     bool cpu_has_sse42  = has_sse42();
-     bool cpu_has_avx    = has_avx1();
+     bool cpu_has_avx    = has_avx();
     bool cpu_has_avx2   = has_avx2();
     bool cpu_has_sha    = has_sha();
-     bool cpu_has_avx512 = has_avx512f();
+     bool cpu_has_avx512 = has_avx512();
     bool sw_has_aes    = false;
     bool sw_has_sse42  = false;
     bool sw_has_avx    = false;
@@ -3340,16 +3389,16 @@ bool check_cpu_capability ()
     #ifdef __AVX2__
         sw_has_avx2 = true;
     #endif
-     #ifdef __AVX512F__
+     #if (defined(__AVX512F__) && defined(__AVX51DQF__) && defined(__AVX51BW__) && defined(__AVX512VL__))
         sw_has_avx512 = true;
     #endif
     #ifdef __SHA__
         sw_has_sha = true;
     #endif

-     #if !((__AES__) || (__SSE2__))
-         printf("Neither __AES__ nor __SSE2__ defined.\n");
-     #endif
+//     #if !((__AES__) || (__SSE2__))
+//         printf("Neither __AES__ nor __SSE2__ defined.\n");
+//     #endif

     cpu_brand_string( cpu_brand );
     printf( "CPU: %s.\n", cpu_brand );
@@ -3557,6 +3606,7 @@ int main(int argc, char *argv[])
   memset( share_stats, 0, 2 *  sizeof (struct share_stats_t) );
   gettimeofday( &last_submit_time, NULL );
   memcpy( &five_min_start, &last_submit_time, sizeof (struct timeval) );
+   memcpy( &session_start, &last_submit_time, sizeof (struct timeval) );

   if ( !check_cpu_capability() ) exit(1);

@@ -3631,20 +3681,32 @@ int main(int argc, char *argv[])
     applog( LOG_INFO,"%u CPU cores available, %u miner threads selected.",
             num_cpus, opt_n_threads );

-// To be reviewed
+// To be confirmed with more than 64 cpus
   if ( opt_affinity != -1 )
   {
-      if ( num_cpus > 64 )
+      if ( !affinity_uses_uint128 && num_cpus > 64 )
      {
-          applog(LOG_WARNING,"--cpu-affinity argument is not supported with more");
-          applog(LOG_WARNING," than 64 CPUs, using default affinity.");
+          applog(LOG_WARNING,"Setting CPU affinity with more than 64 CPUs is only");
+          applog(LOG_WARNING,"available on Linux. Using default affinity.");
          opt_affinity = -1;
      }
      else	
      {
-         if (!opt_quiet)
-            applog(LOG_DEBUG, "Binding process to cpu mask %x", opt_affinity);
-         affine_to_cpu_mask( -1, (unsigned long)opt_affinity );
+         affine_to_cpu_mask( -1, opt_affinity );
+         if ( !opt_quiet )
+         {
+#if AFFINITY_USES_UINT128
+            if ( num_cpus > 64 )
+               applog(LOG_DEBUG, "Binding process to cpu mask %x",
+                      u128_hi64( opt_affinity ), u128_lo64( opt_affinity ) );
+            else 
+               applog(LOG_DEBUG, "Binding process to cpu mask %x",
+                      opt_affinity );
+#else
+               applog(LOG_DEBUG, "Binding process to cpu mask %x",
+                      opt_affinity );
+#endif
+         }
      }
   }

--- a/miner.h
+++ b/miner.h
@@ -352,6 +352,7 @@ bool   submit_lane_solution( struct work *work, void *hash,


 void   get_currentalgo( char* buf, int sz );
+/*
 bool   has_sha();
 bool   has_aes_ni();
 bool   has_avx1();
@@ -368,6 +369,7 @@ void   cpu_getmodelid(char *outbuf, size_t maxsz);
 void   cpu_brand_string( char* s );

 float cpu_temp( int core );
+*/

 struct work {
 	uint32_t data[48] __attribute__ ((aligned (64)));
@@ -724,7 +726,6 @@ extern bool opt_debug;
 extern bool opt_debug_diff;
 extern bool opt_benchmark;
 extern bool opt_protocol;
-extern bool opt_showdiff;
 extern bool opt_extranonce;
 extern bool opt_quiet;
 extern bool opt_redirect;
@@ -867,7 +868,7 @@ Options:\n\
                          x16rv2        Ravencoin (RVN)\n\
                          x16rt         Gincoin (GIN)\n\
                          x16rt-veil    Veil (VEIL)\n\
-                          x16s          Pigeoncoin (PGN)\n\
+                          x16s\n\
                          x17\n\
                          x21s\n\
                          xevan         Bitsend (BSD)\n\
@@ -901,7 +902,6 @@ Options:\n\
      -f, --diff-factor     Divide req. difficulty by this factor (std is 1.0)\n\
  -m, --diff-multiplier Multiply difficulty by this factor (std is 1.0)\n\
      --hash-meter      Display thread hash rates\n\
-      --hide-diff       Do not display changes in difficulty\n\
      --coinbase-addr=ADDR  payout address for solo mining\n\
      --coinbase-sig=TEXT  data to insert in the coinbase when possible\n\
      --no-longpoll     disable long polling support\n\
@@ -965,7 +965,6 @@ static struct option const options[] = {
        { "diff", 1, NULL, 'f' }, // deprecated (alias)
        { "diff-multiplier", 1, NULL, 'm' },
        { "hash-meter", 0, NULL, 1014 },
-        { "hide-diff", 0, NULL, 1013 },
        { "help", 0, NULL, 'h' },
        { "key", 1, NULL, 'K' },
        { "no-gbt", 0, NULL, 1011 },
--- a/simd-utils/simd-128.h
+++ b/simd-utils/simd-128.h
@@ -298,30 +298,38 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
 // 64 and 32 bit elements.

 // compiler doesn't like when a variable is used for the last arg of
-// _mm_rol_epi32, must be "8 bit immediate".
+// _mm_rol_epi32, must be "8 bit immediate". Therefore use rol_var where
+// necessary.
 // sm3-hash-4way.c fails to compile.
+
+#define mm128_ror_var_64( v, c ) \
+   _mm_or_si128( _mm_srli_epi64( v, c ), _mm_slli_epi64( v, 64-(c) ) )
+
+#define mm128_rol_var_64( v, c ) \
+   _mm_or_si128( _mm_slli_epi64( v, c ), _mm_srli_epi64( v, 64-(c) ) )
+
+#define mm128_ror_var_32( v, c ) \
+   _mm_or_si128( _mm_srli_epi32( v, c ), _mm_slli_epi32( v, 32-(c) ) )
+
+#define mm128_rol_var_32( v, c ) \
+   _mm_or_si128( _mm_slli_epi32( v, c ), _mm_srli_epi32( v, 32-(c) ) )
+
+
 /*
 #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)

-#define mm128_ror_64( v, c )    _mm_ror_epi64( v, c )
-#define mm128_rol_64( v, c )    _mm_rol_epi64( v, c )
-#define mm128_ror_32( v, c )    _mm_ror_epi32( v, c )
-#define mm128_rol_32( v, c )    _mm_rol_epi32( v, c )
+#define mm128_ror_64    _mm_ror_epi64
+#define mm128_rol_64    _mm_rol_epi64
+#define mm128_ror_32    _mm_ror_epi32
+#define mm128_rol_32    _mm_rol_epi32

 #else
 */

-#define mm128_ror_64( v, c ) \
-   _mm_or_si128( _mm_srli_epi64( v, c ), _mm_slli_epi64( v, 64-(c) ) )
-
-#define mm128_rol_64( v, c ) \
-   _mm_or_si128( _mm_slli_epi64( v, c ), _mm_srli_epi64( v, 64-(c) ) )
-
-#define mm128_ror_32( v, c ) \
-   _mm_or_si128( _mm_srli_epi32( v, c ), _mm_slli_epi32( v, 32-(c) ) )
-
-#define mm128_rol_32( v, c ) \
-   _mm_or_si128( _mm_slli_epi32( v, c ), _mm_srli_epi32( v, 32-(c) ) )
+#define mm128_ror_64   mm128_ror_var_64
+#define mm128_rol_64   mm128_rol_var_64
+#define mm128_ror_32   mm128_ror_var_32
+#define mm128_rol_32   mm128_rol_var_32

 //#endif   // AVX512 else

--- a/simd-utils/simd-256.h
+++ b/simd-utils/simd-256.h
@@ -367,38 +367,49 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )
 //
 // AVX512 has bit rotate for 256 bit vectors with 64 or 32 bit elements

+
 // compiler doesn't like when a variable is used for the last arg of
-// _mm_rol_epi32, must be "8 bit immediate".
+// _mm_rol_epi32, must be "8 bit immediate". Therefore use rol_var where
+// necessary. 
+
+#define mm256_ror_var_64( v, c ) \
+   _mm256_or_si256( _mm256_srli_epi64( v, c ), \
+                    _mm256_slli_epi64( v, 64-(c) ) )
+
+#define mm256_rol_var_64( v, c ) \
+   _mm256_or_si256( _mm256_slli_epi64( v, c ), \
+                    _mm256_srli_epi64( v, 64-(c) ) )
+
+#define mm256_ror_var_32( v, c ) \
+   _mm256_or_si256( _mm256_srli_epi32( v, c ), \
+                    _mm256_slli_epi32( v, 32-(c) ) )
+
+#define mm256_rol_var_32( v, c ) \
+   _mm256_or_si256( _mm256_slli_epi32( v, c ), \
+                    _mm256_srli_epi32( v, 32-(c) ) )
+
 /*
 #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)

-#define mm256_ror_64( v, c )    _mm256_ror_epi64( v, c )
-#define mm256_rol_64( v, c )    _mm256_rol_epi64( v, c )
-#define mm256_ror_32( v, c )    _mm256_ror_epi32( v, c )
-#define mm256_rol_32( v, c )    _mm256_rol_epi32( v, c )
+// AVX512, control must be 8 bit immediate.
+
+#define mm256_ror_64    _mm256_ror_epi64
+#define mm256_rol_64    _mm256_rol_epi64
+#define mm256_ror_32    _mm256_ror_epi32
+#define mm256_rol_32    _mm256_rol_epi32

 #else
 */

-#define mm256_ror_64( v, c ) \
-   _mm256_or_si256( _mm256_srli_epi64( v, c ), \
-                    _mm256_slli_epi64( v, 64-(c) ) )
+// No AVX512, use fallback.

-#define mm256_rol_64( v, c ) \
-   _mm256_or_si256( _mm256_slli_epi64( v, c ), \
-                    _mm256_srli_epi64( v, 64-(c) ) )
-
-#define mm256_ror_32( v, c ) \
-   _mm256_or_si256( _mm256_srli_epi32( v, c ), \
-                    _mm256_slli_epi32( v, 32-(c) ) )
-
-#define mm256_rol_32( v, c ) \
-   _mm256_or_si256( _mm256_slli_epi32( v, c ), \
-                    _mm256_srli_epi32( v, 32-(c) ) )
+#define mm256_ror_64    mm256_ror_var_64 
+#define mm256_rol_64    mm256_rol_var_64
+#define mm256_ror_32    mm256_ror_var_32
+#define mm256_rol_32    mm256_rol_var_32

 // #endif     // AVX512 else

-
 #define  mm256_ror_16( v, c ) \
   _mm256_or_si256( _mm256_srli_epi16( v, c ), \
                    _mm256_slli_epi16( v, 16-(c) ) )
--- a/simd-utils/simd-512.h
+++ b/simd-utils/simd-512.h
@@ -278,7 +278,7 @@ static inline __m512i mm512_neg1_fn()
 // Horizontal vector testing

 #define mm512_allbits0( a )    _mm512_cmpeq_epi64_mask( a, m512_zero )
-#define mm256_allbits1( a )    _mm512_cmpeq_epi64_mask( a, m512_neg1 )
+#define mm512_allbits1( a )    _mm512_cmpeq_epi64_mask( a, m512_neg1 )
 #define mm512_anybits0( a )    _mm512_cmpneq_epi64_mask( a, m512_neg1 )
 #define mm512_anybits1( a )    _mm512_cmpneq_epi64_mask( a, m512_zero )

@@ -287,11 +287,30 @@ static inline __m512i mm512_neg1_fn()
 // Bit rotations.

 // AVX512F has built-in fixed and variable bit rotation for 64 & 32 bit
-// elements and can be called directly.
+// elements and can be called directly. But they only accept immediate 8
+// for control arg. 
 //
 // _mm512_rol_epi64,  _mm512_ror_epi64,  _mm512_rol_epi32,  _mm512_ror_epi32
 // _mm512_rolv_epi64, _mm512_rorv_epi64, _mm512_rolv_epi32, _mm512_rorv_epi32
 //
+
+#define mm512_ror_var_64( v, c ) \
+   _mm512_or_si512( _mm512_srli_epi64( v, c ), \
+                    _mm512_slli_epi64( v, 64-(c) ) )
+
+#define mm512_rol_var_64( v, c ) \
+   _mm512_or_si512( _mm512_slli_epi64( v, c ), \
+                    _mm512_srli_epi64( v, 64-(c) ) )
+
+#define mm512_ror_var_32( v, c ) \
+   _mm512_or_si512( _mm512_srli_epi32( v, c ), \
+                    _mm512_slli_epi32( v, 32-(c) ) )
+
+#define mm512_rol_var_32( v, c ) \
+   _mm512_or_si512( _mm512_slli_epi32( v, c ), \
+                    _mm512_srli_epi32( v, 32-(c) ) )
+
+
 // Here is a fixed bit rotate for 16 bit elements:
 #define mm512_ror_16( v, c ) \
    _mm512_or_si512( _mm512_srli_epi16( v, c ), \
@@ -300,6 +319,8 @@ static inline __m512i mm512_neg1_fn()
    _mm512_or_si512( _mm512_slli_epi16( v, c ), \
                     _mm512_srli_epi16( v, 16-(c) )

+
+
 // Rotations using a vector control index are very slow due to overhead
 // to generate the index vector. Repeated rotations using the same index
 // are better handled by the calling function where the index only needs
--- a/sysinfos.c
+++ b/sysinfos.c
@@ -1,8 +1,13 @@
+#if !defined(SYSINJFOS_C___)
+#define SYSINFOS_C__
+
 /**
 * Unit to read cpu informations
 *
 * tpruvot 2014
- */
+ * JayDDee 2019
+ * 
+*/

 #include <stdio.h>
 #include <ctype.h>
@@ -28,7 +33,7 @@
 #define HWMON_ALT5 \
 "/sys/class/hwmon/hwmon0/device/temp1_input"

-static float linux_cputemp(int core)
+static inline float linux_cputemp(int core)
 {
 	float tc = 0.0;
 	FILE *fd = fopen(HWMON_PATH, "r");
@@ -60,7 +65,7 @@ static float linux_cputemp(int core)

 #define CPUFREQ_PATH \
 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq"
-static uint32_t linux_cpufreq(int core)
+static inline uint32_t linux_cpufreq(int core)
 {
 	FILE *fd = fopen(CPUFREQ_PATH, "r");
 	uint32_t freq = 0;
@@ -76,7 +81,7 @@ static uint32_t linux_cpufreq(int core)

 #else /* WIN32 */

-static float win32_cputemp(int core)
+static inline float win32_cputemp(int core)
 {
 	// todo
 	return 0.0;
@@ -88,7 +93,7 @@ static float win32_cputemp(int core)
 /* exports */


-float cpu_temp(int core)
+static inline float cpu_temp(int core)
 {
 #ifdef WIN32
 	return win32_cputemp(core);
@@ -97,7 +102,7 @@ float cpu_temp(int core)
 #endif
 }

-uint32_t cpu_clock(int core)
+static inline uint32_t cpu_clock(int core)
 {
 #ifdef WIN32
 	return 0;
@@ -106,7 +111,7 @@ uint32_t cpu_clock(int core)
 #endif
 }

-int cpu_fanpercent()
+static inline int cpu_fanpercent()
 {
 	return 0;
 }
@@ -142,7 +147,7 @@ static inline void cpuid(int functionnumber, int output[4]) {
 #define cpuid(fn, out) out[0] = 0;
 #endif

-void cpu_getname(char *outbuf, size_t maxsz)
+static inline void cpu_getname(char *outbuf, size_t maxsz)
 {
   memset(outbuf, 0, maxsz);
 #ifdef WIN32
@@ -190,7 +195,7 @@ void cpu_getname(char *outbuf, size_t maxsz)
 #endif
 }

-void cpu_getmodelid(char *outbuf, size_t maxsz)
+static inline void cpu_getmodelid(char *outbuf, size_t maxsz)
 {
   memset(outbuf, 0, maxsz);
 #ifdef WIN32
@@ -259,32 +264,47 @@ void cpu_getmodelid(char *outbuf, size_t maxsz)
 #define CPU_BRAND_2          (0x80000003)
 #define CPU_BRAND_3          (0x80000004)

+// Registers
 #define EAX_Reg  (0)
 #define EBX_Reg  (1)
 #define ECX_Reg  (2)
 #define EDX_Reg  (3)

-#define XSAVE_Flag    (1<<26) // ECX
+// Feature flags
+
+// CPU_INFO ECX
+#define XSAVE_Flag    (1<<26) 
 #define OSXSAVE_Flag  (1<<27)
-#define AVX1_Flag     (1<<28)
+#define AVX_Flag     (1<<28)
 #define XOP_Flag      (1<<11)
 #define FMA3_Flag     (1<<12)
 #define AES_Flag      (1<<25)
 #define SSE42_Flag    (1<<20)

+// CPU_INFO EDX
 #define SSE_Flag      (1<<25) // EDX
 #define SSE2_Flag     (1<<26) 

-#define AVX2_Flag     (1<< 5) // ADV EBX
+// EXTENDED_FEATURES EBX
+#define AVX2_Flag     (1<< 5)
 #define AVX512F_Flag  (1<<16)
+#define AVX512DQ_Flag (1<<17)
 #define SHA_Flag      (1<<29)
+#define AVX512BW_Flag (1<<30)
+#define AVX512VL_Flag (1<<31)
+
+// EXTENDED_FEATURES ECX
+#define AVX512VBMI_Flag  (1<<1) 
+#define AVX512VBMI2_Flag (1<<6)
+#define AVX512VAES_Flag  (1<<9)
+

 // Use this to detect presence of feature
-#define AVX1_mask     (AVX1_Flag|XSAVE_Flag|OSXSAVE_Flag)
-#define FMA3_mask     (FMA3_Flag|AVX1_mask)
+#define AVX_mask     (AVX_Flag|XSAVE_Flag|OSXSAVE_Flag)
+#define FMA3_mask     (FMA3_Flag|AVX_mask)
+#define AVX512_mask   (AVX512VL_Flag|AVX512BW_Flag|AVX512DQ_Flag|AVX512F_Flag)

-
-static inline bool has_sha_()
+static inline bool has_sha()
 {
 #ifdef __arm__
    return false;
@@ -295,10 +315,7 @@ static inline bool has_sha_()
 #endif
 }

-bool has_sha() { return has_sha_(); }
-
-
-static inline bool has_sse2_()
+static inline bool has_sse2()
 {
 #ifdef __arm__
    return false;
@@ -309,10 +326,8 @@ static inline bool has_sse2_()
 #endif
 }

-bool has_sse2() { return has_sse2_(); } 
-
-// nehalem and above, no AVX1 on nehalem
-static inline bool has_aes_ni_()
+// nehalem and above, no AVX on nehalem
+static inline bool has_aes_ni()
 {
 #ifdef __arm__
 	return false;
@@ -323,24 +338,20 @@ static inline bool has_aes_ni_()
 #endif
 }

-bool has_aes_ni() { return has_aes_ni_(); }
-
 // westmere and above
-static inline bool has_avx1_()
+static inline bool has_avx()
 {
 #ifdef __arm__
        return false;
 #else
        int cpu_info[4] = { 0 };
        cpuid( CPU_INFO, cpu_info );
-        return ( ( cpu_info[ ECX_Reg ] & AVX1_mask ) == AVX1_mask );
+        return ( ( cpu_info[ ECX_Reg ] & AVX_mask ) == AVX_mask );
 #endif
 }

-bool has_avx1() { return has_avx1_(); }
-
 // haswell and above
-static inline bool has_avx2_()
+static inline bool has_avx2()
 {
 #ifdef __arm__
    return false;
@@ -351,9 +362,7 @@ static inline bool has_avx2_()
 #endif
 }

-bool has_avx2() { return has_avx2_(); }
-
-static inline bool has_avx512f_()
+static inline bool has_avx512f()
 {
 #ifdef __arm__
    return false;
@@ -364,24 +373,75 @@ static inline bool has_avx512f_()
 #endif
 }

-bool has_avx512f() { return has_avx512f_(); }
-
-
-// AMD only
-static inline bool has_xop_()
+static inline bool has_avx512dq()
 {
 #ifdef __arm__
    return false;
 #else
    int cpu_info[4] = { 0 };
-        cpuid( CPU_INFO, cpu_info );
+    cpuid( EXTENDED_FEATURES, cpu_info );
+    return cpu_info[ EBX_Reg ] & AVX512DQ_Flag;
+#endif
+}
+
+static inline bool has_avx512bw()
+{
+#ifdef __arm__
+    return false;
+#else
+    int cpu_info[4] = { 0 };
+    cpuid( EXTENDED_FEATURES, cpu_info );
+    return cpu_info[ EBX_Reg ] & AVX512BW_Flag;
+#endif
+}
+
+static inline bool has_avx512vl()
+{
+#ifdef __arm__
+    return false;
+#else
+    int cpu_info[4] = { 0 };
+    cpuid( EXTENDED_FEATURES, cpu_info );
+    return cpu_info[ EBX_Reg ] & AVX512VL_Flag;
+#endif
+}
+
+// Minimum to be useful
+static inline bool has_avx512()
+{
+#ifdef __arm__
+    return false;
+#else
+    int cpu_info[4] = { 0 };
+    cpuid( EXTENDED_FEATURES, cpu_info );
+    return ( ( cpu_info[ EBX_Reg ] & AVX512_mask ) == AVX512_mask );
+#endif
+}
+
+static inline bool has_avx512vaes()
+{
+#ifdef __arm__
+    return false;
+#else
+    int cpu_info[4] = { 0 };
+    cpuid( EXTENDED_FEATURES, cpu_info );
+    return cpu_info[ ECX_Reg ] & AVX512VAES_Flag;
+#endif
+}
+
+// AMD only
+static inline bool has_xop()
+{
+#ifdef __arm__
+        return false;
+#else
+        int cpu_info[4] = { 0 };
+        cpuid( EXTENDED_CPU_INFO, cpu_info );
        return cpu_info[ ECX_Reg ] & XOP_Flag;
 #endif
 }

-bool has_xop() { return has_xop_(); }
-
-static inline bool has_fma3_()
+static inline bool has_fma3()
 {
 #ifdef __arm__
        return false;
@@ -392,9 +452,7 @@ static inline bool has_fma3_()
 #endif
 }

-bool has_fma3() { return has_fma3_(); }
-
-static inline bool has_sse42_()
+static inline bool has_sse42()
 {
 #ifdef __arm__
        return false;
@@ -405,9 +463,7 @@ static inline bool has_sse42_()
 #endif
 }

-bool has_sse42() { return has_sse42_(); }
-
-static inline bool has_sse_()
+static inline bool has_sse()
 {
 #ifdef __arm__
        return false;
@@ -418,16 +474,14 @@ static inline bool has_sse_()
 #endif
 }

-bool has_sse() { return has_sse_(); }
-
-uint32_t cpuid_get_highest_function_number()
+static inline uint32_t cpuid_get_highest_function_number()
 {
  uint32_t cpu_info[4] = {0};
  cpuid( VENDOR_ID, cpu_info);
  return cpu_info[ EAX_Reg ];
 }

-void cpuid_get_highest_function( char* s )
+static inline void cpuid_get_highest_function( char* s )
 {
  uint32_t fn = cpuid_get_highest_function_number();
  switch (fn)
@@ -449,7 +503,7 @@ void cpuid_get_highest_function( char* s )
  }
 }

-void cpu_bestfeature(char *outbuf, size_t maxsz)
+static inline void cpu_bestfeature(char *outbuf, size_t maxsz)
 {
 #ifdef __arm__
 	sprintf(outbuf, "ARM");
@@ -459,19 +513,19 @@ void cpu_bestfeature(char *outbuf, size_t maxsz)
 	cpuid( CPU_INFO, cpu_info );
 	cpuid( EXTENDED_FEATURES, cpu_info_adv );

-        if ( has_avx1_() && has_avx2_() )
+        if ( has_avx() && has_avx2() )
              sprintf(outbuf, "AVX2");
-        else if ( has_avx1_() )
-              sprintf(outbuf, "AVX1");
-        else if ( has_fma3_() )
+        else if ( has_avx() )
+              sprintf(outbuf, "AVX");
+        else if ( has_fma3() )
              sprintf(outbuf, "FMA3");
-        else if ( has_xop_() )
+        else if ( has_xop() )
              sprintf(outbuf, "XOP");
-        else if ( has_sse42_() )
+        else if ( has_sse42() )
              sprintf(outbuf, "SSE42");
-        else if ( has_sse2_() )
+        else if ( has_sse2() )
              sprintf(outbuf, "SSE2");
-        else if ( has_sse_() )
+        else if ( has_sse() )
              sprintf(outbuf, "SSE");
        else
              *outbuf = '\0';
@@ -479,7 +533,7 @@ void cpu_bestfeature(char *outbuf, size_t maxsz)
 #endif
 }

-void cpu_brand_string( char* s )
+static inline void cpu_brand_string( char* s )
 {
 #ifdef __arm__
        sprintf( s, "ARM" );
@@ -498,3 +552,5 @@ void cpu_brand_string( char* s )
 #endif
 }    

+#endif  // SYSINFOS_C__
+
--- a/util.c
+++ b/util.c
@@ -24,6 +24,7 @@
 #include <unistd.h>
 #include <jansson.h>
 #include <curl/curl.h>
+#include "sysinfos.c"
 #include <time.h>
 #include <sys/stat.h>
 #include <math.h>