diff --git a/README.md b/README.md
index 99d127b..dd5af57 100644
--- a/README.md
+++ b/README.md
@@ -122,10 +122,10 @@ Supported Algorithms
                           x13sm3        hsr (Hshare)
                           x14           X14
                           x15           X15
-                          x16r          Ravencoin (RVN) (original algo)
-                          x16rv2        Ravencoin (RVN) (new algo)
+                          x16r          
+                          x16rv2        Ravencoin (RVN)
                           x16rt         Gincoin (GIN)
-                          x16rt_veil    Veil (VEIL)
+                          x16rt-veil    Veil (VEIL)
                           x16s          Pigeoncoin (PGN)
                           x17
                           x21s
@@ -136,7 +136,7 @@ Supported Algorithms
                           yescryptr32   WAVI
                           yespower      Cryply
                           yespowerr16   Yenten (YTN)
-                          yespoer-b2b   generic yespower + blake2b
+                          yespower-b2b  generic yespower + blake2b
                           zr5           Ziftr
 
 Errata
@@ -160,10 +160,12 @@ Bugs
 ----
 
 Users are encouraged to post their bug reports using git issues or on the
-Bitcoin Talk forum at:
+Bitcoin Talk forum or opening an issue in git:
 
 https://bitcointalk.org/index.php?topic=1326803.0
 
+https://github.com/JayDDee/cpuminer-opt/issues
+
 All problem reports must be accompanied by a proper problem definition.
 This should include how the problem occurred, the command line and
 output from the miner showing the startup messages and any errors.
@@ -175,10 +177,6 @@ Donations
 cpuminer-opt has no fees of any kind but donations are accepted.
 
  BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
- ETH: 0x72122edabcae9d3f57eab0729305a425f6fef6d0
- LTC: LdUwoHJnux9r9EKqFWNvAi45kQompHk6e8
- BCH: 1QKYkB6atn4P7RFozyziAXLEnurwnUM1cQ
- BTG: GVUyECtRHeC5D58z9F3nGGfVQndwnsPnHQ
 
 Happy mining!
 
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 935c65f..8e1a504 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -1,11 +1,6 @@
 cpuminer-opt is a console program run from the command line using the
 keyboard, not the mouse.
 
-cpuminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
-This feature requires recent SW including GCC version 5 or higher and
-openssl version 1.1 or higher. It may also require using "-march=znver1"
-compile flag.
-
 Security warning
 ----------------
 
@@ -36,17 +31,26 @@ FreeBSD YMMV.
 Change Log
 ----------
 
+v3.9.9.1
+
+Fixed a day1 bug that could cause the miner to idle for up to 2 minutes
+under certain circumstances.
+
+Redesigned summary stats report now includes session statistics.
+
+More robust handling of statistics to reduce corruption.
+
+Removed --hide-diff option.
+
+Better handling of cpu-affinity with more than 64 CPUs.
+
 v3.9.9
 
 Added power2b algo for MicroBitcoin.
-
 Added generic yespower-b2b (yespower + blake2b) algo to be used with
 the parameters introduced in v3.9.7 for yespower & yescrypt.
-
 Display additional info when a share is rejected.
-
 Some low level enhancements and minor tweaking of log output.
-
 RELEASE_NOTES (this file) and README.md  added to Windows release package.
 
 v3.9.8.1
diff --git a/algo-gate-api.c b/algo-gate-api.c
index e8c0885..bcce910 100644
--- a/algo-gate-api.c
+++ b/algo-gate-api.c
@@ -116,8 +116,6 @@ void init_algo_gate( algo_gate_t* gate )
    gate->get_nonceptr            = (void*)&std_get_nonceptr;
    gate->work_decode             = (void*)&std_le_work_decode;
    gate->decode_extra_data       = (void*)&do_nothing;
-   gate->wait_for_diff           = (void*)&std_wait_for_diff;
-   gate->get_max64               = (void*)&get_max64_0x1fffffLL;
    gate->gen_merkle_root         = (void*)&sha256d_gen_merkle_root;
    gate->stratum_gen_work        = (void*)&std_stratum_gen_work;
    gate->build_stratum_request   = (void*)&std_le_build_stratum_request;
@@ -278,7 +276,7 @@ bool register_json_rpc2( algo_gate_t *gate )
   applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
   applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
 
-  gate->wait_for_diff           = (void*)&do_nothing;
+//  gate->wait_for_diff           = (void*)&do_nothing;
   gate->get_new_work            = (void*)&jr2_get_new_work;
   gate->get_nonceptr            = (void*)&jr2_get_nonceptr;
   gate->stratum_gen_work        = (void*)&jr2_stratum_gen_work;
diff --git a/algo-gate-api.h b/algo-gate-api.h
index 8792e53..7d88ec9 100644
--- a/algo-gate-api.h
+++ b/algo-gate-api.h
@@ -35,7 +35,7 @@
 //    6. Determine if other non existant functions are required.
 //    That is determined by the need to add code in cpu-miner.c
 //    that applies only to the new algo. That is forbidden. All
-//    algo specific code must be in theh algo's file.
+//    algo specific code must be in the algo's file.
 //
 //    7. If new functions need to be added to the gate add the type
 //    to the structure, declare a null instance in this file and define
@@ -48,10 +48,10 @@
 //    instances as they are defined by default, or unsafe functions that
 //    are not needed by the algo.
 //
-//    9. Add an case entry to the switch/case in function register_gate
+//    9. Add a case entry to the switch/case in function register_gate
 //    in file algo-gate-api.c for the new algo.
 //
-//    10 If a new function type was defined add an entry to ini talgo_gate
+//    10 If a new function type was defined add an entry to init algo_gate
 //    to initialize the new function to its null instance described in step 7.
 //
 //    11. If the new algo has aliases add them to the alias array in
@@ -110,14 +110,7 @@ inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
 
 typedef struct
 {
-// special case, only one target, provides a callback for scanhash to
-// submit work with less overhead.
-// bool (*submit_work )             ( struct thr_info*, const struct work* );
-
 // mandatory functions, must be overwritten
-// Added a 5th arg for the thread_info structure to replace the int thr id
-// in the first arg. Both will co-exist during the trasition.
-//int ( *scanhash ) ( int, struct work*, uint32_t, uint64_t* );
 int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );
 
 // optional unsafe, must be overwritten if algo uses function
@@ -131,14 +124,12 @@ void ( *get_new_work )           ( struct work*, struct work*, int, uint32_t*,
                                    bool );
 uint32_t *( *get_nonceptr )      ( uint32_t* );
 void ( *decode_extra_data )      ( struct work*, uint64_t* );
-void ( *wait_for_diff )          ( struct stratum_ctx* );
-int64_t ( *get_max64 )           ();
 bool ( *work_decode )            ( const json_t*, struct work* );
 bool ( *submit_getwork_result )  ( CURL*, struct work* );
 void ( *gen_merkle_root )        ( char*, struct stratum_ctx* );
 void ( *build_extraheader )      ( struct work*, struct stratum_ctx* );
 void ( *build_block_header )     ( struct work*, uint32_t, uint32_t*,
-	                           uint32_t*, uint32_t, uint32_t );
+	                                uint32_t*, uint32_t, uint32_t );
 void ( *build_stratum_request )  ( char*, struct work*, struct stratum_ctx* );
 char* ( *malloc_txs_request )    ( struct work* );
 void ( *set_work_data_endian )   ( struct work* );
@@ -200,8 +191,6 @@ void null_hash_suw();
 
 // optional safe targets, default listed first unless noted.
 
-void std_wait_for_diff();
-
 uint32_t *std_get_nonceptr( uint32_t *work_data );
 uint32_t *jr2_get_nonceptr( uint32_t *work_data );
 
@@ -216,21 +205,13 @@ void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *work );
 void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx );
 void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );
 
-// pick your favorite or define your own
-int64_t get_max64_0x1fffffLL(); // default
-int64_t get_max64_0x40LL();
-int64_t get_max64_0x3ffff();
-int64_t get_max64_0x3fffffLL();
-int64_t get_max64_0x1ffff();
-int64_t get_max64_0xffffLL();
-
 bool std_le_work_decode( const json_t *val, struct work *work );
 bool std_be_work_decode( const json_t *val, struct work *work );
-bool jr2_work_decode( const json_t *val, struct work *work );
+bool jr2_work_decode(    const json_t *val, struct work *work );
 
 bool std_le_submit_getwork_result( CURL *curl, struct work *work );
 bool std_be_submit_getwork_result( CURL *curl, struct work *work );
-bool jr2_submit_getwork_result( CURL *curl, struct work *work );
+bool jr2_submit_getwork_result(    CURL *curl, struct work *work );
 
 void std_le_build_stratum_request( char *req, struct work *work );
 void std_be_build_stratum_request( char *req, struct work *work );
@@ -244,8 +225,8 @@ void set_work_data_big_endian( struct work *work );
 double std_calc_network_diff( struct work *work );
 
 void std_build_block_header( struct work* g_work, uint32_t version,
-	                     uint32_t *prevhash,  uint32_t *merkle_root,
-   	                     uint32_t ntime, uint32_t nbits );
+	                          uint32_t *prevhash,  uint32_t *merkle_root,
+   	                       uint32_t ntime,      uint32_t nbits );
 
 void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
 
@@ -266,8 +247,8 @@ int std_get_work_data_size();
 // by calling the algo's register function.
 bool register_algo_gate( int algo, algo_gate_t *gate );
 
-// Override any default gate functions that are applicable and do any other
-// algo-specific initialization.
+// Called by algos toverride any default gate functions that are applicable
+// and do any other algo-specific initialization.
 // The register functions for all the algos can be declared here to reduce
 // compiler warnings but that's just more work for devs adding new algos.
 bool register_algo( algo_gate_t *gate );
@@ -280,5 +261,7 @@ bool register_json_rpc2( algo_gate_t *gate );
 // use this to call the hash function of an algo directly, ie util.c test.
 void exec_hash_function( int algo, void *output, const void *pdata );
 
-void get_algo_alias( char** algo_or_alias );
+// Validate a string as a known algo and alias, updates arg to proper
+// algo name if valid alias, NULL if invalid alias or algo.
+void get_algo_alias( char **algo_or_alias );
 
diff --git a/algo/argon2/argon2a/argon2a.c b/algo/argon2/argon2a/argon2a.c
index 94f6f2c..699e1fa 100644
--- a/algo/argon2/argon2a/argon2a.c
+++ b/algo/argon2/argon2a/argon2a.c
@@ -74,18 +74,12 @@ int scanhash_argon2( struct work* work, uint32_t max_nonce,
 	return 0;
 }
 
-int64_t argon2_get_max64 ()
-{
-  return 0x1ffLL;
-}
-
 bool register_argon2_algo( algo_gate_t* gate )
 {
   gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
   gate->scanhash        = (void*)&scanhash_argon2;
   gate->hash            = (void*)&argon2hash;
   gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
-  gate->get_max64       = (void*)&argon2_get_max64;
   opt_target_factor = 65536.0;
 
   return true;
diff --git a/algo/argon2/argon2d/argon2d-gate.c b/algo/argon2/argon2d/argon2d-gate.c
index fa6a206..300bf57 100644
--- a/algo/argon2/argon2d/argon2d-gate.c
+++ b/algo/argon2/argon2d/argon2d-gate.c
@@ -179,12 +179,9 @@ int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
    return 0;
 }
 
-int64_t get_max64_0x1ff() { return 0x1ff; }
-
 bool register_argon2d4096_algo( algo_gate_t* gate )
 {
         gate->scanhash = (void*)&scanhash_argon2d4096;
-        gate->get_max64  = (void*)&get_max64_0x1ff;
         gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
         opt_target_factor = 65536.0;
         return true;
diff --git a/algo/blake/blake-gate.c b/algo/blake/blake-gate.c
index 7fc6480..7dd8d94 100644
--- a/algo/blake/blake-gate.c
+++ b/algo/blake/blake-gate.c
@@ -1,18 +1,8 @@
 #include "blake-gate.h"
 
-int64_t blake_get_max64 ()
-{
-  return 0x7ffffLL;
-}
-
 bool register_blake_algo( algo_gate_t* gate )
 {
   gate->optimizations = AVX2_OPT;
-  gate->get_max64 = (void*)&blake_get_max64;
-//#if defined (__AVX2__) && defined (FOUR_WAY)
-//   gate->optimizations = SSE2_OPT | AVX2_OPT;
-//  gate->scanhash  = (void*)&scanhash_blake_8way;
-//  gate->hash      = (void*)&blakehash_8way;
 #if defined(BLAKE_4WAY)
   four_way_not_tested();
   gate->scanhash  = (void*)&scanhash_blake_4way;
diff --git a/algo/blake/blake2b-gate.c b/algo/blake/blake2b-gate.c
index e875e04..da8851c 100644
--- a/algo/blake/blake2b-gate.c
+++ b/algo/blake/blake2b-gate.c
@@ -1,13 +1,5 @@
 #include "blake2b-gate.h"
 
-/*
-// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
-int64_t blake2s_get_max64 ()
-{
-   return 0x7ffffLL;
-}
-*/
-
 bool register_blake2b_algo( algo_gate_t* gate )
 {
 #if defined(BLAKE2B_4WAY)
@@ -17,7 +9,6 @@ bool register_blake2b_algo( algo_gate_t* gate )
   gate->scanhash  = (void*)&scanhash_blake2b;
   gate->hash      = (void*)&blake2b_hash;
 #endif
-//  gate->get_max64 = (void*)&blake2s_get_max64;
   gate->optimizations =  AVX2_OPT;
   return true;
 };
diff --git a/algo/blake/blake2s-gate.c b/algo/blake/blake2s-gate.c
index 68ace1a..a35047f 100644
--- a/algo/blake/blake2s-gate.c
+++ b/algo/blake/blake2s-gate.c
@@ -1,12 +1,5 @@
 #include "blake2s-gate.h"
 
-
-// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
-int64_t blake2s_get_max64 ()
-{
-   return 0x7ffffLL;
-}
-
 bool register_blake2s_algo( algo_gate_t* gate )
 {
 #if defined(BLAKE2S_8WAY)
@@ -19,7 +12,6 @@ bool register_blake2s_algo( algo_gate_t* gate )
   gate->scanhash  = (void*)&scanhash_blake2s;
   gate->hash      = (void*)&blake2s_hash;
 #endif
-  gate->get_max64 = (void*)&blake2s_get_max64;
   gate->optimizations = SSE2_OPT | AVX2_OPT;
   return true;
 };
diff --git a/algo/blake/blake2s.c b/algo/blake/blake2s.c
index aee4ce5..ec5b46f 100644
--- a/algo/blake/blake2s.c
+++ b/algo/blake/blake2s.c
@@ -70,18 +70,3 @@ int scanhash_blake2s( struct work *work,
 
 	return 0;
 }
-/*
-// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
-int64_t blake2s_get_max64 ()
-{
-   return 0x7ffffLL;
-}
-
-bool register_blake2s_algo( algo_gate_t* gate )
-{
-  gate->scanhash  = (void*)&scanhash_blake2s;
-  gate->hash      = (void*)&blake2s_hash;
-  gate->get_max64 = (void*)&blake2s_get_max64;
-  return true;
-};
-*/
diff --git a/algo/blake/blakecoin-gate.c b/algo/blake/blakecoin-gate.c
index 0429063..b4322b0 100644
--- a/algo/blake/blakecoin-gate.c
+++ b/algo/blake/blakecoin-gate.c
@@ -1,13 +1,6 @@
 #include "blakecoin-gate.h"
 #include <memory.h>
 
-// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
-int64_t blakecoin_get_max64 ()
-{
-  return 0x7ffffLL;
-//  return 0x3fffffLL;
-}
-
 // vanilla uses default gen merkle root, otherwise identical to blakecoin
 bool register_vanilla_algo( algo_gate_t* gate )
 {
@@ -23,7 +16,6 @@ bool register_vanilla_algo( algo_gate_t* gate )
   gate->hash     = (void*)&blakecoinhash;
 #endif
   gate->optimizations = SSE42_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&blakecoin_get_max64;
   return true;
 }
 
diff --git a/algo/blake/blakecoin.c b/algo/blake/blakecoin.c
index f733c2c..3cea5d9 100644
--- a/algo/blake/blakecoin.c
+++ b/algo/blake/blakecoin.c
@@ -93,33 +93,3 @@ int scanhash_blakecoin( struct work *work, uint32_t max_nonce,
 	return 0;
 }
 
-/*
-void blakecoin_gen_merkle_root ( char* merkle_root, struct stratum_ctx* sctx )
-{
- SHA256( sctx->job.coinbase, (int)sctx->job.coinbase_size, merkle_root );
-}
-*/
-/*
-// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
-int64_t blakecoin_get_max64 ()
-{
-  return 0x7ffffLL;
-}
-
-// vanilla uses default gen merkle root, otherwise identical to blakecoin
-bool register_vanilla_algo( algo_gate_t* gate )
-{
-    gate->scanhash = (void*)&scanhash_blakecoin;
-    gate->hash     = (void*)&blakecoinhash;
-    gate->get_max64 = (void*)&blakecoin_get_max64;
-    blakecoin_init( &blake_init_ctx );
-    return true;
-}
-
-bool register_blakecoin_algo( algo_gate_t* gate )
-{
-  register_vanilla_algo( gate );
-  gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
-  return true;
-}
-*/
diff --git a/algo/blake/decred-gate.c b/algo/blake/decred-gate.c
index 0e06c5e..ef58a81 100644
--- a/algo/blake/decred-gate.c
+++ b/algo/blake/decred-gate.c
@@ -38,7 +38,7 @@ void decred_decode_extradata( struct work* work, uint64_t* net_blocks )
    if (!have_longpoll && work->height > *net_blocks + 1)
    {
       char netinfo[64] = { 0 };
-      if (opt_showdiff && net_diff > 0.)
+      if ( net_diff > 0. )
       {
          if (net_diff != work->targetdiff)
             sprintf(netinfo, ", diff %.3f, target %.1f", net_diff,
@@ -154,7 +154,6 @@ bool register_decred_algo( algo_gate_t* gate )
 #endif
   gate->optimizations = AVX2_OPT;
   gate->get_nonceptr          = (void*)&decred_get_nonceptr;
-  gate->get_max64             = (void*)&get_max64_0x3fffffLL;
   gate->decode_extra_data     = (void*)&decred_decode_extradata;
   gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
   gate->work_decode           = (void*)&std_be_work_decode;
diff --git a/algo/blake/decred.c b/algo/blake/decred.c
index 8645d2a..37ad1f3 100644
--- a/algo/blake/decred.c
+++ b/algo/blake/decred.c
@@ -143,7 +143,7 @@ void decred_decode_extradata( struct work* work, uint64_t* net_blocks )
    if (!have_longpoll && work->height > *net_blocks + 1)
    {
       char netinfo[64] = { 0 };
-      if (opt_showdiff && net_diff > 0.)
+      if (net_diff > 0.)
       {
          if (net_diff != work->targetdiff)
 	    sprintf(netinfo, ", diff %.3f, target %.1f", net_diff,
@@ -269,7 +269,6 @@ bool register_decred_algo( algo_gate_t* gate )
   gate->scanhash              = (void*)&scanhash_decred;
   gate->hash                  = (void*)&decred_hash;
   gate->get_nonceptr          = (void*)&decred_get_nonceptr;
-  gate->get_max64             = (void*)&get_max64_0x3fffffLL;
   gate->decode_extra_data     = (void*)&decred_decode_extradata;
   gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
   gate->work_decode           = (void*)&std_be_work_decode;
diff --git a/algo/blake/pentablake-gate.c b/algo/blake/pentablake-gate.c
index b194206..9d84486 100644
--- a/algo/blake/pentablake-gate.c
+++ b/algo/blake/pentablake-gate.c
@@ -10,7 +10,6 @@ bool register_pentablake_algo( algo_gate_t* gate )
     gate->hash      = (void*)&pentablakehash;
 #endif
     gate->optimizations = AVX2_OPT;
-    gate->get_max64 = (void*)&get_max64_0x3ffff;
     return true;
 };
 
diff --git a/algo/bmw/bmw512-gate.c b/algo/bmw/bmw512-gate.c
index e52b04a..48277ed 100644
--- a/algo/bmw/bmw512-gate.c
+++ b/algo/bmw/bmw512-gate.c
@@ -1,11 +1,8 @@
 #include "bmw512-gate.h"
 
-int64_t bmw512_get_max64() { return 0x7ffffLL; }
-
 bool register_bmw512_algo( algo_gate_t* gate )
 {
   gate->optimizations = AVX2_OPT;
-  gate->get_max64       = (void*)&bmw512_get_max64;
   opt_target_factor = 256.0;
 #if defined (BMW512_4WAY)
   gate->scanhash  = (void*)&scanhash_bmw512_4way;
diff --git a/algo/cryptonight/cryptolight.c b/algo/cryptonight/cryptolight.c
index 6295be5..ca2923d 100644
--- a/algo/cryptonight/cryptolight.c
+++ b/algo/cryptonight/cryptolight.c
@@ -363,7 +363,6 @@ bool register_cryptolight_algo( algo_gate_t* gate )
   gate->scanhash  = (void*)&scanhash_cryptolight;
   gate->hash      = (void*)&cryptolight_hash;
   gate->hash_suw  = (void*)&cryptolight_hash; 
-  gate->get_max64 = (void*)&get_max64_0x40LL;
   return true;
 };
 
diff --git a/algo/cryptonight/cryptonight-common.c b/algo/cryptonight/cryptonight-common.c
index e6c346b..2a5146f 100644
--- a/algo/cryptonight/cryptonight-common.c
+++ b/algo/cryptonight/cryptonight-common.c
@@ -111,7 +111,6 @@ bool register_cryptonight_algo( algo_gate_t* gate )
   gate->scanhash         = (void*)&scanhash_cryptonight;
   gate->hash             = (void*)&cryptonight_hash;
   gate->hash_suw         = (void*)&cryptonight_hash_suw;  
-  gate->get_max64        = (void*)&get_max64_0x40LL;
   return true;
 };
 
@@ -123,7 +122,6 @@ bool register_cryptonightv7_algo( algo_gate_t* gate )
   gate->scanhash      = (void*)&scanhash_cryptonight;
   gate->hash          = (void*)&cryptonight_hash;
   gate->hash_suw      = (void*)&cryptonight_hash_suw;
-  gate->get_max64     = (void*)&get_max64_0x40LL;
   return true;
 };
 
diff --git a/algo/groestl/groestl.c b/algo/groestl/groestl.c
index 30f2599..df1c2c3 100644
--- a/algo/groestl/groestl.c
+++ b/algo/groestl/groestl.c
@@ -100,7 +100,6 @@ bool register_dmd_gr_algo( algo_gate_t* gate )
     gate->optimizations   = SSE2_OPT | AES_OPT;
     gate->scanhash        = (void*)&scanhash_groestl;
     gate->hash            = (void*)&groestlhash;
-    gate->get_max64       = (void*)&get_max64_0x3ffff;
     opt_target_factor = 256.0;
     return true;
 };
diff --git a/algo/groestl/myr-groestl.c b/algo/groestl/myr-groestl.c
index 5a3b897..bff0360 100644
--- a/algo/groestl/myr-groestl.c
+++ b/algo/groestl/myr-groestl.c
@@ -88,15 +88,3 @@ int scanhash_myriad( struct work *work, uint32_t max_nonce,
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
-/*
-bool register_myriad_algo( algo_gate_t* gate )
-{
-    gate->optimizations = SSE2_OPT | AES_OPT;
-    init_myrgr_ctx();
-    gate->scanhash = (void*)&scanhash_myriad;
-    gate->hash     = (void*)&myriadhash;
-//    gate->hash_alt = (void*)&myriadhash;
-    gate->get_max64 = (void*)&get_max64_0x3ffff;
-    return true;
-};
-*/
diff --git a/algo/groestl/myrgr-gate.c b/algo/groestl/myrgr-gate.c
index aa8ebd8..7f8e185 100644
--- a/algo/groestl/myrgr-gate.c
+++ b/algo/groestl/myrgr-gate.c
@@ -12,7 +12,6 @@ bool register_myriad_algo( algo_gate_t* gate )
   gate->hash      = (void*)&myriad_hash;
 #endif
   gate->optimizations = AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
   return true;
 };
 
diff --git a/algo/keccak/keccak-gate.c b/algo/keccak/keccak-gate.c
index c963def..0ebc2d4 100644
--- a/algo/keccak/keccak-gate.c
+++ b/algo/keccak/keccak-gate.c
@@ -1,12 +1,10 @@
 #include "keccak-gate.h"
 
-int64_t keccak_get_max64() { return 0x7ffffLL; }
 
 bool register_keccak_algo( algo_gate_t* gate )
 {
   gate->optimizations = AVX2_OPT;
   gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
-  gate->get_max64       = (void*)&keccak_get_max64;
   opt_target_factor = 128.0;
 #if defined (KECCAK_4WAY)
   gate->scanhash  = (void*)&scanhash_keccak_4way;
@@ -22,7 +20,6 @@ bool register_keccakc_algo( algo_gate_t* gate )
 {
   gate->optimizations = AVX2_OPT;
   gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
-  gate->get_max64       = (void*)&keccak_get_max64;
   opt_target_factor = 256.0;
 #if defined (KECCAK_4WAY)
   gate->scanhash  = (void*)&scanhash_keccak_4way;
diff --git a/algo/lyra2/lyra2-gate.c b/algo/lyra2/lyra2-gate.c
index 38db1d6..b608ba2 100644
--- a/algo/lyra2/lyra2-gate.c
+++ b/algo/lyra2/lyra2-gate.c
@@ -127,7 +127,6 @@ bool register_lyra2z_algo( algo_gate_t* gate )
   gate->hash       = (void*)&lyra2z_hash;
 #endif
   gate->optimizations = SSE42_OPT | AVX2_OPT;
-  gate->get_max64  = (void*)&get_max64_0xffffLL;
   opt_target_factor = 256.0;
   return true;
 };
@@ -147,15 +146,12 @@ bool register_lyra2h_algo( algo_gate_t* gate )
   gate->hash       = (void*)&lyra2h_hash;
 #endif
   gate->optimizations = SSE42_OPT | AVX2_OPT;
-  gate->get_max64  = (void*)&get_max64_0xffffLL;
   opt_target_factor = 256.0;
   return true;
 };
 
 /////////////////////////////////
 
-int64_t allium_get_max64_0xFFFFLL() { return 0xFFFFLL; }
-
 bool register_allium_algo( algo_gate_t* gate )
 {
 #if defined (ALLIUM_4WAY)
@@ -168,7 +164,6 @@ bool register_allium_algo( algo_gate_t* gate )
   gate->hash      = (void*)&allium_hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
-  gate->get_max64         = (void*)&allium_get_max64_0xFFFFLL;
   opt_target_factor = 256.0;
   return true;
 };
@@ -214,7 +209,6 @@ bool register_phi2_algo( algo_gate_t* gate )
    gate->get_work_data_size = (void*)&phi2_get_work_data_size;
    gate->decode_extra_data  = (void*)&phi2_decode_extra_data;
    gate->build_extraheader  = (void*)&phi2_build_extraheader;
-   gate->get_max64          = (void*)&get_max64_0xffffLL;
    opt_target_factor = 256.0;
 #if defined(PHI2_4WAY)
    gate->scanhash           = (void*)&scanhash_phi2_4way;
diff --git a/algo/lyra2/lyra2re.c b/algo/lyra2/lyra2re.c
index 62a164c..024016b 100644
--- a/algo/lyra2/lyra2re.c
+++ b/algo/lyra2/lyra2re.c
@@ -113,18 +113,12 @@ int scanhash_lyra2re( struct work *work, uint32_t max_nonce,
 	return 0;
 }
 
-int64_t lyra2re_get_max64 ()
-{
-  return 0xffffLL;
-}
-
 bool register_lyra2re_algo( algo_gate_t* gate )
 {
   init_lyra2re_ctx();
   gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
   gate->scanhash   = (void*)&scanhash_lyra2re;
   gate->hash       = (void*)&lyra2re_hash;
-  gate->get_max64  = (void*)&lyra2re_get_max64;
   opt_target_factor = 128.0;
   return true;
 };
diff --git a/algo/lyra2/lyra2z330.c b/algo/lyra2/lyra2z330.c
index 4eb8640..24af0dd 100644
--- a/algo/lyra2/lyra2z330.c
+++ b/algo/lyra2/lyra2z330.c
@@ -70,7 +70,6 @@ bool register_lyra2z330_algo( algo_gate_t* gate )
   gate->miner_thread_init = (void*)&lyra2z330_thread_init;
   gate->scanhash   = (void*)&scanhash_lyra2z330;
   gate->hash       = (void*)&lyra2z330_hash;
-  gate->get_max64  = (void*)&get_max64_0xffffLL;
   opt_target_factor = 256.0;
   return true;
 };
diff --git a/algo/m7m.c b/algo/m7m.c
index 7d847aa..c2e37ba 100644
--- a/algo/m7m.c
+++ b/algo/m7m.c
@@ -296,8 +296,6 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
 
      pdata[19] = n;
 
-// can this be skipped after finding a share? Seems to work ok.
-//out:
      mpf_set_prec_raw(magifpi, prec0);
      mpf_set_prec_raw(magifpi0, prec0);
      mpf_set_prec_raw(mptmp, prec0);
@@ -323,7 +321,6 @@ bool register_m7m_algo( algo_gate_t *gate )
   gate->build_stratum_request = (void*)&std_be_build_stratum_request;
   gate->work_decode           = (void*)&std_be_work_decode;
   gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
-  gate->get_max64             = (void*)&get_max64_0x1ffff;
   gate->set_work_data_endian  = (void*)&set_work_data_big_endian;
   opt_target_factor = 65536.0;
   return true;
diff --git a/algo/nist5/zr5.c b/algo/nist5/zr5.c
index 9ec6e19..7a39a1b 100644
--- a/algo/nist5/zr5.c
+++ b/algo/nist5/zr5.c
@@ -208,12 +208,6 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
        ++(*nonceptr);
 }
 
-int64_t zr5_get_max64 ()
-{
-//  return 0x1ffffLL;
-  return 0x1fffffLL;
-}
-
 void zr5_display_pok( struct work* work )
 {
       if ( work->data[0] & 0x00008000 )
@@ -229,7 +223,6 @@ bool register_zr5_algo( algo_gate_t* gate )
     gate->get_new_work          = (void*)&zr5_get_new_work;
     gate->scanhash              = (void*)&scanhash_zr5;
     gate->hash                  = (void*)&zr5hash;
-    gate->get_max64             = (void*)&zr5_get_max64;
     gate->decode_extra_data     = (void*)&zr5_display_pok;
     gate->build_stratum_request = (void*)&std_be_build_stratum_request;
     gate->work_decode           = (void*)&std_be_work_decode;
diff --git a/algo/ripemd/lbry-gate.c b/algo/ripemd/lbry-gate.c
index ec9af26..8e55867 100644
--- a/algo/ripemd/lbry-gate.c
+++ b/algo/ripemd/lbry-gate.c
@@ -94,8 +94,6 @@ void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
    g_work->data[28] = 0x80000000;
 }
 
-int64_t lbry_get_max64() { return 0x1ffffLL; }
-
 int lbry_get_work_data_size() { return LBRY_WORK_DATA_SIZE; }
 
 bool register_lbry_algo( algo_gate_t* gate )
@@ -112,7 +110,6 @@ bool register_lbry_algo( algo_gate_t* gate )
   gate->hash                  = (void*)&lbry_hash;
 #endif
   gate->calc_network_diff     = (void*)&lbry_calc_network_diff;
-  gate->get_max64             = (void*)&lbry_get_max64;
   gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
 //  gate->build_block_header    = (void*)&build_block_header;
   gate->build_extraheader     = (void*)&lbry_build_extraheader;
diff --git a/algo/scrypt/neoscrypt.c b/algo/scrypt/neoscrypt.c
index c4d6f6d..9003e59 100644
--- a/algo/scrypt/neoscrypt.c
+++ b/algo/scrypt/neoscrypt.c
@@ -1070,17 +1070,6 @@ int scanhash_neoscrypt( struct work *work,
     return 0;
 }
 
-int64_t get_neoscrypt_max64() { return 0x3ffff; }
-
-void neoscrypt_wait_for_diff( struct stratum_ctx *stratum )
-{
-   while ( !stratum->job.diff )
-   {
-//      applog(LOG_DEBUG, "Waiting for Stratum to set the job difficulty");
-      sleep(1);
-   }
-}
-
 int neoscrypt_get_work_data_size () { return 80; }
 
 bool register_neoscrypt_algo( algo_gate_t* gate )
@@ -1088,8 +1077,6 @@ bool register_neoscrypt_algo( algo_gate_t* gate )
   gate->optimizations         = SSE2_OPT;
   gate->scanhash              = (void*)&scanhash_neoscrypt;
   gate->hash                  = (void*)&neoscrypt;
-  gate->get_max64             = (void*)&get_neoscrypt_max64;
-  gate->wait_for_diff         = (void*)&neoscrypt_wait_for_diff;
   gate->build_stratum_request = (void*)&std_be_build_stratum_request;
   gate->work_decode           = (void*)&std_be_work_decode;
   gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
diff --git a/algo/scrypt/pluck.c b/algo/scrypt/pluck.c
index 44d49b8..a0e7275 100644
--- a/algo/scrypt/pluck.c
+++ b/algo/scrypt/pluck.c
@@ -483,11 +483,6 @@ int scanhash_pluck( struct work *work, uint32_t max_nonce,
 	return 0;
 }
 
-int64_t pluck_get_max64 ()
-{
-  return 0x1ffLL;
-}
-
 bool pluck_miner_thread_init( int thr_id )
 { 
   scratchbuf = malloc( 128 * 1024 ); 
@@ -503,7 +498,6 @@ bool register_pluck_algo( algo_gate_t* gate )
   gate->miner_thread_init = (void*)&pluck_miner_thread_init;
   gate->scanhash         = (void*)&scanhash_pluck;
   gate->hash             = (void*)&pluck_hash;
-  gate->get_max64        = (void*)&pluck_get_max64;
   opt_target_factor = 65536.0;
   return true;
 };
diff --git a/algo/scrypt/scrypt.c b/algo/scrypt/scrypt.c
index 4143fb1..68bce4a 100644
--- a/algo/scrypt/scrypt.c
+++ b/algo/scrypt/scrypt.c
@@ -766,8 +766,6 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
 	return 0;
 }
 
-int64_t scrypt_get_max64() { return 0xfff; }
-
 bool scrypt_miner_thread_init( int thr_id )
 {
  scratchbuf = scrypt_buffer_alloc( scratchbuf_size );  
@@ -783,10 +781,8 @@ bool register_scrypt_algo( algo_gate_t* gate )
   gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
   gate->scanhash         = (void*)&scanhash_scrypt;
 //  gate->hash             = (void*)&scrypt_1024_1_1_256_24way;
-  gate->get_max64        = (void*)&scrypt_get_max64;
   opt_target_factor = 65536.0;
 
-
   if ( !opt_param_n )
   {
      opt_param_n = 1024;
diff --git a/algo/scryptjane/scrypt-jane.c b/algo/scryptjane/scrypt-jane.c
index 5329b4f..ea1b463 100644
--- a/algo/scryptjane/scrypt-jane.c
+++ b/algo/scryptjane/scrypt-jane.c
@@ -240,7 +240,6 @@ bool register_scryptjane_algo( algo_gate_t* gate )
 {
     gate->scanhash   = (void*)&scanhash_scryptjane;
     gate->hash       = (void*)&scryptjanehash;
-    gate->get_max64  = (void*)&get_max64_0x40LL;
     opt_target_factor = 65536.0;
 
     // figure out if arg in N or Nfactor
diff --git a/algo/sha/sha256t-gate.c b/algo/sha/sha256t-gate.c
index 0271234..ba7f95d 100644
--- a/algo/sha/sha256t-gate.c
+++ b/algo/sha/sha256t-gate.c
@@ -15,7 +15,6 @@ bool register_sha256t_algo( algo_gate_t* gate )
     gate->scanhash   = (void*)&scanhash_sha256t;
     gate->hash       = (void*)&sha256t_hash;
 #endif
-    gate->get_max64  = (void*)&get_max64_0x3ffff;
     return true;
 }
 
@@ -34,7 +33,6 @@ bool register_sha256q_algo( algo_gate_t* gate )
     gate->scanhash   = (void*)&scanhash_sha256q;
     gate->hash       = (void*)&sha256q_hash;
 #endif
-    gate->get_max64  = (void*)&get_max64_0x3ffff;
     return true;
 
 }
diff --git a/algo/skein/skein-gate.c b/algo/skein/skein-gate.c
index f41c874..6acdc19 100644
--- a/algo/skein/skein-gate.c
+++ b/algo/skein/skein-gate.c
@@ -2,8 +2,6 @@
 #include "sph_skein.h"
 #include "skein-hash-4way.h"
 
-int64_t skein_get_max64() { return 0x7ffffLL; }
-
 bool register_skein_algo( algo_gate_t* gate )
 {
     gate->optimizations = AVX2_OPT | SHA_OPT;
@@ -14,7 +12,6 @@ bool register_skein_algo( algo_gate_t* gate )
     gate->scanhash  = (void*)&scanhash_skein;
     gate->hash      = (void*)&skeinhash;
 #endif
-    gate->get_max64 = (void*)&skein_get_max64;
     return true;
 };
 
diff --git a/algo/skein/skein2-gate.c b/algo/skein/skein2-gate.c
index 34483b2..d40e2c4 100644
--- a/algo/skein/skein2-gate.c
+++ b/algo/skein/skein2-gate.c
@@ -2,11 +2,6 @@
 #include <stdint.h>
 #include "sph_skein.h"
 
-int64_t skein2_get_max64 ()
-{
-  return 0x7ffffLL;
-}
-
 bool register_skein2_algo( algo_gate_t* gate )
 {
   gate->optimizations = AVX2_OPT;
@@ -17,7 +12,6 @@ bool register_skein2_algo( algo_gate_t* gate )
   gate->scanhash  = (void*)&scanhash_skein2;
   gate->hash      = (void*)&skein2hash;
 #endif
-  gate->get_max64 = (void*)&skein2_get_max64;
   return true;
 };
 
diff --git a/algo/sm3/sm3-hash-4way.c b/algo/sm3/sm3-hash-4way.c
index 501642f..f900aba 100644
--- a/algo/sm3/sm3-hash-4way.c
+++ b/algo/sm3/sm3-hash-4way.c
@@ -181,7 +181,7 @@ void sm3_4way_compress( __m128i *digest, __m128i *block )
    for( j =0; j < 16; j++ )
    {
       SS1 = mm128_rol_32( _mm_add_epi32( _mm_add_epi32( mm128_rol_32(A,12), E ),
-                                      mm128_rol_32( T, j ) ), 7 );
+                                      mm128_rol_var_32( T, j ) ), 7 );
       SS2 = _mm_xor_si128( SS1, mm128_rol_32( A, 12 ) );
       TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF0( A, B, C ), D ),
                                           SS2 ), W1[j] );
@@ -200,9 +200,8 @@ void sm3_4way_compress( __m128i *digest, __m128i *block )
    T = _mm_set1_epi32( 0x7A879D8AUL );
    for( j =16; j < 64; j++ )
    {
-      // AVX512 _mm_rol_epi32 doesn't like using a variable for the second arg.
       SS1 = mm128_rol_32( _mm_add_epi32( _mm_add_epi32( mm128_rol_32(A,12), E ),
-                                      mm128_rol_32( T, j&31 ) ), 7 );
+                                      mm128_rol_var_32( T, j&31 ) ), 7 );
       SS2 = _mm_xor_si128( SS1, mm128_rol_32( A, 12 ) );
       TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF1( A, B, C ), D ), 
                                           SS2 ), W1[j] );
diff --git a/algo/x11/c11-gate.c b/algo/x11/c11-gate.c
index 30c719b..d087cc9 100644
--- a/algo/x11/c11-gate.c
+++ b/algo/x11/c11-gate.c
@@ -12,7 +12,6 @@ bool register_c11_algo( algo_gate_t* gate )
   gate->hash      = (void*)&c11_hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
   return true;
 };
 
diff --git a/algo/x11/fresh.c b/algo/x11/fresh.c
index d1994b0..d81cc2f 100644
--- a/algo/x11/fresh.c
+++ b/algo/x11/fresh.c
@@ -125,7 +125,6 @@ bool register_fresh_algo( algo_gate_t* gate )
     algo_not_tested();
     gate->scanhash   = (void*)&scanhash_fresh;
     gate->hash       = (void*)&freshhash;
-    gate->get_max64  = (void*)&get_max64_0x3ffff;
     opt_target_factor = 256.0;
     return true;
 };
diff --git a/algo/x11/timetravel-gate.c b/algo/x11/timetravel-gate.c
index 311b3ba..370ef39 100644
--- a/algo/x11/timetravel-gate.c
+++ b/algo/x11/timetravel-gate.c
@@ -12,7 +12,6 @@ bool register_timetravel_algo( algo_gate_t* gate )
   gate->hash       = (void*)&timetravel_hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64  = (void*)&get_max64_0xffffLL;
   opt_target_factor = 256.0;
   return true;
 };
diff --git a/algo/x11/timetravel10-gate.c b/algo/x11/timetravel10-gate.c
index 3c60bbf..8c21229 100644
--- a/algo/x11/timetravel10-gate.c
+++ b/algo/x11/timetravel10-gate.c
@@ -12,7 +12,6 @@ bool register_timetravel10_algo( algo_gate_t* gate )
   gate->hash       = (void*)&timetravel10_hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64  = (void*)&get_max64_0xffffLL;
   opt_target_factor = 256.0;
   return true;
 };
diff --git a/algo/x11/tribus-gate.c b/algo/x11/tribus-gate.c
index f30d65e..9c9c1ae 100644
--- a/algo/x11/tribus-gate.c
+++ b/algo/x11/tribus-gate.c
@@ -3,7 +3,6 @@
 bool register_tribus_algo( algo_gate_t* gate )
 {
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64     = (void*)&get_max64_0x1ffff;
 #if defined (TRIBUS_4WAY)
 //  init_tribus_4way_ctx();
   gate->scanhash      = (void*)&scanhash_tribus_4way;
diff --git a/algo/x11/x11-gate.c b/algo/x11/x11-gate.c
index adad370..c3917a1 100644
--- a/algo/x11/x11-gate.c
+++ b/algo/x11/x11-gate.c
@@ -12,7 +12,6 @@ bool register_x11_algo( algo_gate_t* gate )
   gate->hash      = (void*)&x11_hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
   return true;
 };
 
diff --git a/algo/x11/x11gost-gate.c b/algo/x11/x11gost-gate.c
index 0d53551..3cb5bc2 100644
--- a/algo/x11/x11gost-gate.c
+++ b/algo/x11/x11gost-gate.c
@@ -12,7 +12,6 @@ bool register_x11gost_algo( algo_gate_t* gate )
   gate->hash      = (void*)&x11gost_hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
   return true;
 };
 
diff --git a/algo/x12/x12-gate.c b/algo/x12/x12-gate.c
index 5220c10..05f7173 100644
--- a/algo/x12/x12-gate.c
+++ b/algo/x12/x12-gate.c
@@ -12,7 +12,6 @@ bool register_x12_algo( algo_gate_t* gate )
   gate->hash      = (void*)&x12hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
   return true;
 };
 
diff --git a/algo/x13/phi1612-gate.c b/algo/x13/phi1612-gate.c
index 9a9d871..65861eb 100644
--- a/algo/x13/phi1612-gate.c
+++ b/algo/x13/phi1612-gate.c
@@ -12,7 +12,6 @@ bool register_phi1612_algo( algo_gate_t* gate )
   gate->hash      = (void*)&phi1612_hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
   return true;
 };
 
diff --git a/algo/x13/x13-gate.c b/algo/x13/x13-gate.c
index ce3e640..60973d3 100644
--- a/algo/x13/x13-gate.c
+++ b/algo/x13/x13-gate.c
@@ -12,7 +12,6 @@ bool register_x13_algo( algo_gate_t* gate )
   gate->hash      = (void*)&x13hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
   return true;
 };
 
diff --git a/algo/x13/x13sm3-gate.c b/algo/x13/x13sm3-gate.c
index c4c348b..bc0fb92 100644
--- a/algo/x13/x13sm3-gate.c
+++ b/algo/x13/x13sm3-gate.c
@@ -12,7 +12,6 @@ bool register_x13sm3_algo( algo_gate_t* gate )
   gate->hash      = (void*)&x13sm3_hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
   return true;
 };
 
@@ -28,7 +27,6 @@ bool register_x13bcd_algo( algo_gate_t* gate )
   gate->hash      = (void*)&x13bcd_hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
   return true;
 };
 
diff --git a/algo/x14/axiom.c b/algo/x14/axiom.c
index 7c0b70a..13a0b73 100644
--- a/algo/x14/axiom.c
+++ b/algo/x14/axiom.c
@@ -83,6 +83,5 @@ bool register_axiom_algo( algo_gate_t* gate )
 {
     gate->scanhash  = (void*)&scanhash_axiom;
     gate->hash      = (void*)&axiomhash;
-    gate->get_max64 = (void*)&get_max64_0x40LL;
     return true;
 }
diff --git a/algo/x14/polytimos-gate.c b/algo/x14/polytimos-gate.c
index aa54060..41f609c 100644
--- a/algo/x14/polytimos-gate.c
+++ b/algo/x14/polytimos-gate.c
@@ -11,7 +11,6 @@ bool register_polytimos_algo( algo_gate_t* gate )
   gate->scanhash  = (void*)&scanhash_polytimos;
   gate->hash      = (void*)&polytimos_hash;
 #endif
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
   return true;
 };
 
diff --git a/algo/x14/veltor-gate.c b/algo/x14/veltor-gate.c
index 5e7e74b..b14613f 100644
--- a/algo/x14/veltor-gate.c
+++ b/algo/x14/veltor-gate.c
@@ -12,7 +12,6 @@ bool register_veltor_algo( algo_gate_t* gate )
   gate->hash      = (void*)&veltor_hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
   return true;
 };
 
diff --git a/algo/x14/x14-gate.c b/algo/x14/x14-gate.c
index d02c305..013aa10 100644
--- a/algo/x14/x14-gate.c
+++ b/algo/x14/x14-gate.c
@@ -12,7 +12,6 @@ bool register_x14_algo( algo_gate_t* gate )
   gate->hash      = (void*)&x14hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64 = (void*)&get_max64_0x3ffff;
   return true;
 };
 
diff --git a/algo/x17/sonoa-gate.c b/algo/x17/sonoa-gate.c
index b420564..fea4d39 100644
--- a/algo/x17/sonoa-gate.c
+++ b/algo/x17/sonoa-gate.c
@@ -11,7 +11,6 @@ bool register_sonoa_algo( algo_gate_t* gate )
   gate->scanhash  = (void*)&scanhash_sonoa;
   gate->hash      = (void*)&sonoa_hash;
 #endif
-  gate->get_max64     = (void*)&get_max64_0x1ffff;
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
   return true;
 };
diff --git a/algo/x17/xevan-gate.c b/algo/x17/xevan-gate.c
index 52195a1..96b811c 100644
--- a/algo/x17/xevan-gate.c
+++ b/algo/x17/xevan-gate.c
@@ -12,7 +12,6 @@ bool register_xevan_algo( algo_gate_t* gate )
   gate->hash      = (void*)&xevan_hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
-  gate->get_max64  = (void*)&get_max64_0xffffLL;
   opt_target_factor = 256.0;
   return true;
 };
diff --git a/algo/yescrypt/yescrypt.c b/algo/yescrypt/yescrypt.c
index 44fdabd..fb39d2e 100644
--- a/algo/yescrypt/yescrypt.c
+++ b/algo/yescrypt/yescrypt.c
@@ -416,16 +416,6 @@ int scanhash_yescrypt( struct work *work, uint32_t max_nonce,
         return 0;
 }
 
-int64_t yescrypt_get_max64()
-{
-  return 0x1ffLL;
-}
-
-int64_t yescryptr16_get_max64()
-{
-  return 0xfffLL;
-}
-
 void yescrypt_gate_base(algo_gate_t *gate )
 {
    gate->optimizations = SSE2_OPT | SHA_OPT;
@@ -437,7 +427,6 @@ void yescrypt_gate_base(algo_gate_t *gate )
 bool register_yescrypt_algo( algo_gate_t* gate )
 {
    yescrypt_gate_base( gate );
-   gate->get_max64  = (void*)&yescrypt_get_max64;
 
    if ( opt_param_n )  YESCRYPT_N = opt_param_n;
    else                YESCRYPT_N = 2048;
@@ -469,7 +458,6 @@ bool register_yescrypt_algo( algo_gate_t* gate )
 bool register_yescryptr8_algo( algo_gate_t* gate )
 {
    yescrypt_gate_base( gate );
-   gate->get_max64  = (void*)&yescrypt_get_max64;
    yescrypt_client_key = "Client Key";
    yescrypt_client_key_len = 10;
    YESCRYPT_N = 2048;
@@ -481,7 +469,6 @@ bool register_yescryptr8_algo( algo_gate_t* gate )
 bool register_yescryptr16_algo( algo_gate_t* gate )
 {
    yescrypt_gate_base( gate );
-   gate->get_max64  = (void*)&yescryptr16_get_max64;
    yescrypt_client_key = "Client Key";
    yescrypt_client_key_len = 10;
    YESCRYPT_N = 4096;   
@@ -493,7 +480,6 @@ bool register_yescryptr16_algo( algo_gate_t* gate )
 bool register_yescryptr32_algo( algo_gate_t* gate )
 {
    yescrypt_gate_base( gate );
-   gate->get_max64  = (void*)&yescryptr16_get_max64;
    yescrypt_client_key = "WaviBanana";
    yescrypt_client_key_len = 10;
    YESCRYPT_N = 4096;
diff --git a/algo/yespower/yespower-gate.c b/algo/yespower/yespower-gate.c
index 3d5845a..7384869 100644
--- a/algo/yespower/yespower-gate.c
+++ b/algo/yespower/yespower-gate.c
@@ -109,11 +109,6 @@ int scanhash_yespower_b2b( struct work *work, uint32_t max_nonce,
         return 0;
 }
 
-int64_t yespower_get_max64()
-{
-  return 0xfffLL;
-}
-
 bool register_yespower_algo( algo_gate_t* gate )
 {
   yespower_params.version = YESPOWER_1_0;
@@ -141,7 +136,6 @@ bool register_yespower_algo( algo_gate_t* gate )
      applog( LOG_NOTICE,"Key= \"%s\"\n", yespower_params.pers );
 
   gate->optimizations = SSE2_OPT;
-  gate->get_max64     = (void*)&yespower_get_max64;
   gate->scanhash      = (void*)&scanhash_yespower;
   gate->hash          = (void*)&yespower_hash;
   opt_target_factor = 65536.0;
@@ -156,7 +150,6 @@ bool register_yespowerr16_algo( algo_gate_t* gate )
   yespower_params.pers    = NULL;
   yespower_params.perslen = 0;
   gate->optimizations = SSE2_OPT;
-  gate->get_max64     = (void*)&yespower_get_max64;
   gate->scanhash      = (void*)&scanhash_yespower;
   gate->hash          = (void*)&yespower_hash;
   opt_target_factor = 65536.0;
@@ -164,21 +157,10 @@ bool register_yespowerr16_algo( algo_gate_t* gate )
  };
 
 
-int64_t yescrypt_05_get_max64()
-{
-  return 0x1ffLL;
-}
-
-int64_t yescryptr16_05_get_max64()
-{
-  return 0xfffLL;
-}
-
 bool register_yescrypt_05_algo( algo_gate_t* gate )
 {
    gate->optimizations = SSE2_OPT | SHA_OPT;
    gate->scanhash   = (void*)&scanhash_yespower;
-   gate->get_max64  = (void*)&yescrypt_05_get_max64;
    yespower_params.version = YESPOWER_0_5;
    yespower_params.N       = 2048;
    yespower_params.r       = 8;
@@ -192,7 +174,6 @@ bool register_yescryptr8_05_algo( algo_gate_t* gate )
 {
    gate->optimizations = SSE2_OPT | SHA_OPT;
    gate->scanhash   = (void*)&scanhash_yespower;
-   gate->get_max64  = (void*)&yescrypt_05_get_max64;
    yespower_params.version = YESPOWER_0_5;
    yespower_params.N       = 2048;
    yespower_params.r       = 8;
@@ -206,7 +187,6 @@ bool register_yescryptr16_05_algo( algo_gate_t* gate )
 {
    gate->optimizations = SSE2_OPT | SHA_OPT;
    gate->scanhash   = (void*)&scanhash_yespower;
-   gate->get_max64  = (void*)&yescryptr16_05_get_max64;
    yespower_params.version = YESPOWER_0_5;
    yespower_params.N       = 4096;
    yespower_params.r       = 16;
@@ -220,7 +200,6 @@ bool register_yescryptr32_05_algo( algo_gate_t* gate )
 {
    gate->optimizations = SSE2_OPT | SHA_OPT;
    gate->scanhash   = (void*)&scanhash_yespower;
-   gate->get_max64  = (void*)&yescryptr16_05_get_max64;
    yespower_params.version = YESPOWER_0_5;
    yespower_params.N       = 4096;
    yespower_params.r       = 32;
@@ -245,7 +224,6 @@ bool register_power2b_algo( algo_gate_t* gate )
   applog( LOG_NOTICE,"Key length= %d\n", yespower_params.perslen );
 
   gate->optimizations = SSE2_OPT;
-  gate->get_max64     = (void*)&yespower_get_max64;
   gate->scanhash      = (void*)&scanhash_yespower_b2b;
   gate->hash          = (void*)&yespower_b2b_hash;
   opt_target_factor = 65536.0;
@@ -286,7 +264,6 @@ bool register_yespower_b2b_algo( algo_gate_t* gate )
   }  
 
   gate->optimizations = SSE2_OPT;
-  gate->get_max64     = (void*)&yespower_get_max64;
   gate->scanhash      = (void*)&scanhash_yespower_b2b;
   gate->hash          = (void*)&yespower_b2b_hash;
   opt_target_factor = 65536.0;
diff --git a/api.c b/api.c
index 8999397..60855b4 100644
--- a/api.c
+++ b/api.c
@@ -32,7 +32,7 @@
 #include <sys/types.h>
 
 #include "miner.h"
-
+#include "sysinfos.c"
 #ifndef WIN32
 # include <errno.h>
 # include <sys/socket.h>
@@ -105,7 +105,7 @@ extern double global_hashrate;
 #define USE_MONITORING
 extern float cpu_temp(int);
 extern uint32_t cpu_clock(int);
-extern int cpu_fanpercent(void);
+//extern int cpu_fanpercent(void);
 
 /***************************************************************/
 
diff --git a/configure b/configure
index 50f9a5e..84d13ec 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.9.
+# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.9.1.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='cpuminer-opt'
 PACKAGE_TARNAME='cpuminer-opt'
-PACKAGE_VERSION='3.9.9'
-PACKAGE_STRING='cpuminer-opt 3.9.9'
+PACKAGE_VERSION='3.9.9.1'
+PACKAGE_STRING='cpuminer-opt 3.9.9.1'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures cpuminer-opt 3.9.9 to adapt to many kinds of systems.
+\`configure' configures cpuminer-opt 3.9.9.1 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1404,7 +1404,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of cpuminer-opt 3.9.9:";;
+     short | recursive ) echo "Configuration of cpuminer-opt 3.9.9.1:";;
    esac
   cat <<\_ACEOF
 
@@ -1509,7 +1509,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-cpuminer-opt configure 3.9.9
+cpuminer-opt configure 3.9.9.1
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by cpuminer-opt $as_me 3.9.9, which was
+It was created by cpuminer-opt $as_me 3.9.9.1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2993,7 +2993,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='cpuminer-opt'
- VERSION='3.9.9'
+ VERSION='3.9.9.1'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by cpuminer-opt $as_me 3.9.9, which was
+This file was extended by cpuminer-opt $as_me 3.9.9.1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-cpuminer-opt config.status 3.9.9
+cpuminer-opt config.status 3.9.9.1
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/configure.ac b/configure.ac
index f947f4f..4a049db 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([cpuminer-opt], [3.9.9])
+AC_INIT([cpuminer-opt], [3.9.9.1])
 
 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
diff --git a/cpu-miner.c b/cpu-miner.c
index fe11840..5bad7af 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -37,6 +37,7 @@
 #include <curl/curl.h>
 #include <jansson.h>
 #include <openssl/sha.h>
+#include "sysinfos.c"
 
 #ifdef WIN32
 #include <winsock2.h>
@@ -81,7 +82,6 @@ bool opt_debug_diff = false;
 bool opt_protocol = false;
 bool opt_benchmark = false;
 bool opt_redirect = true;
-bool opt_showdiff = true;
 bool opt_extranonce = true;
 bool want_longpoll = true;
 bool have_longpoll = false;
@@ -110,12 +110,16 @@ int opt_n_threads = 0;
 bool opt_reset_on_stale = false;
 
 // Windows doesn't support 128 bit affinity mask.
+// Need compile time and run time test.
 #if defined(__linux) && defined(GCC_INT128)  
 #define AFFINITY_USES_UINT128 1
-uint128_t opt_affinity = -1LL;
+uint128_t opt_affinity = -1;
+static bool affinity_uses_uint128 = true;
 #else
-uint64_t opt_affinity = -1LL;
+uint64_t opt_affinity = -1;
+static bool affinity_uses_uint128 = false;
 #endif
+
 int opt_priority = 0;
 int num_cpus = 1;
 int num_cpugroups = 1;
@@ -148,9 +152,10 @@ double opt_target_factor = 1.0;
 uint32_t zr5_pok = 0;
 bool opt_stratum_stats = false;
 bool opt_hash_meter = false;
-uint32_t accepted_share_count = 0ULL;
-uint32_t rejected_share_count = 0ULL;
-uint32_t solved_block_count = 0ULL;
+uint32_t submitted_share_count= 0;
+uint32_t accepted_share_count = 0;
+uint32_t rejected_share_count = 0;
+uint32_t solved_block_count = 0;
 double *thr_hashrates;
 double *thr_hashcount;
 double global_hashcount = 0;
@@ -212,9 +217,9 @@ static inline void drop_policy(void)
 
 // Linux affinity can use int128.
 #if AFFINITY_USES_UINT128
-static void affine_to_cpu_mask( int id, unsigned __int128 mask )
+static void affine_to_cpu_mask( int id, uint128_t mask )
 #else
-static void affine_to_cpu_mask( int id, unsigned long long mask )
+static void affine_to_cpu_mask( int id, uint64_t mask )
 #endif
 {
    cpu_set_t set;
@@ -225,9 +230,9 @@ static void affine_to_cpu_mask( int id, unsigned long long mask )
    {
       // cpu mask
 #if AFFINITY_USES_UINT128
-      if( ( mask & ( (unsigned __int128)1ULL << i ) ) )  CPU_SET( i, &set );
+      if( ( mask & ( (uint128_t)1 << i ) ) )  CPU_SET( i, &set );
 #else
-      if( (ncpus > 64) || ( mask & (1ULL << i) ) )  CPU_SET( i, &set );
+      if( (ncpus > 64) || ( mask & (1 << i) ) )  CPU_SET( i, &set );
 #endif
    }
    if ( id == -1 )
@@ -246,7 +251,7 @@ static void affine_to_cpu_mask( int id, unsigned long long mask )
 static inline void drop_policy(void) { }
 
 // Windows CPU groups to manage more than 64 CPUs.
-static void affine_to_cpu_mask( int id, unsigned long mask )
+static void affine_to_cpu_mask( int id, uint64_t mask )
 {
    bool success;
    unsigned long last_error;    
@@ -254,45 +259,44 @@ static void affine_to_cpu_mask( int id, unsigned long mask )
 //   DWORD last_error;
 
    if ( id == -1 )
-	success = SetProcessAffinityMask( GetCurrentProcess(), mask );
+      success = SetProcessAffinityMask( GetCurrentProcess(), mask );
 
 // Are Windows CPU Groups supported?
 #if _WIN32_WINNT==0x0601
    else if ( num_cpugroups == 1 )
-	success = SetThreadAffinityMask( GetCurrentThread(), mask );
+	   success = SetThreadAffinityMask( GetCurrentThread(), mask );
    else
    {
-	// Find the correct cpu group
-	int cpu = id % num_cpus;
-	int group;
-	for( group = 0; group < num_cpugroups; group++ )
-	{
-	   int cpus = GetActiveProcessorCount( group );
- 	   if ( cpu < cpus )
-	      break;
+	   // Find the correct cpu group
+	   int cpu = id % num_cpus;
+	   int group;
+	   for( group = 0; group < num_cpugroups; group++ )
+	   {
+	      int cpus = GetActiveProcessorCount( group );
+ 	      if ( cpu < cpus )  break;
+  	      cpu -= cpus;
+      }
 
-  	   cpu -= cpus;
-   }
+	   if (opt_debug)
+         applog(LOG_DEBUG, "Binding thread %d to cpu %d on cpu group %d (mask %x)",
+               id, cpu, group, (1ULL << cpu));
 
-	if (opt_debug)
-	applog(LOG_DEBUG, "Binding thread %d to cpu %d on cpu group %d (mask %x)", id, cpu, group, (1ULL << cpu));
-
-	GROUP_AFFINITY affinity;
-	affinity.Group = group;
-	affinity.Mask = 1ULL << cpu;
-	success = SetThreadGroupAffinity( GetCurrentThread(), &affinity, NULL );
+	   GROUP_AFFINITY affinity;
+	   affinity.Group = group;
+	   affinity.Mask = 1ULL << cpu;
+	   success = SetThreadGroupAffinity( GetCurrentThread(), &affinity, NULL );
    }
 #else
    else 
-        success = SetThreadAffinityMask( GetCurrentThread(), mask );
+      success = SetThreadAffinityMask( GetCurrentThread(), mask );
 #endif
 
    if (!success)
    {
-	last_error = GetLastError();
-	applog(LOG_WARNING, "affine_to_cpu_mask for %u returned %x", id, last_error);
+	   last_error = GetLastError();
+	   applog(LOG_WARNING, "affine_to_cpu_mask for %u returned %x",
+               id, last_error);
    }
-
 }
 
 #else
@@ -831,21 +835,22 @@ void scale_hash_for_display ( double* hashrate, char* prefix )
      {  *prefix = 'E';  *hashrate /= 1e18;  }
 }
 
-static inline void sprintf_et( char *str, uint64_t seconds )
+static inline void sprintf_et( char *str, int seconds )
 {
-   uint64_t min = seconds / 60;
-   uint64_t sec = seconds % 60;
-   uint64_t hrs = min / 60;
+   // sprintf doesn't like uint64_t, Linux thinks it's long, Windows long long.
+   unsigned int min = seconds / 60;
+   unsigned int sec = seconds % 60;
+   unsigned int hrs = min / 60;
    if ( hrs )   
    {
-      uint64_t days = hrs / 24;
+      unsigned int days = hrs / 24;
       if ( days )  //0d00h
-         sprintf( str, "%llud%02lluh", days, hrs % 24 );
+         sprintf( str, "%ud%02uh", days, hrs % 24 );
       else         // 0h00m  
-         sprintf( str, "%lluh%02llum", hrs, min % 60 );
+         sprintf( str, "%uh%02um", hrs, min % 60 );
    }
    else         // 0m00s
-      sprintf( str, "%llum%02llus", min, sec );
+      sprintf( str, "%um%02us", min, sec );
 }
    
 // Bitcoin formula for converting difficulty to an equivalent
@@ -859,23 +864,33 @@ static inline void sprintf_et( char *str, uint64_t seconds )
 
 const double diff_to_hash = 4294967296.;
 
+static struct   timeval session_start;
 static struct   timeval five_min_start;
-static double   time_sum    = 0.;
 static double   latency_sum = 0.;
 static uint64_t submit_sum  = 0;
+static uint64_t accept_sum  = 0;
 static uint64_t reject_sum  = 0;
+static double   norm_diff_sum = 0.;
 static uint32_t last_block_height = 0;
 static double   last_targetdiff = 0.;
+static double   ref_rate_hi = 0.;
+static double   ref_rate_lo = 1e100;
+#if !(defined(__WINDOWS__) || defined(__WIN64))
+static uint32_t hi_temp = 0;
+#endif
+//static uint32_t stratum_errors = 0;
 
 struct share_stats_t
 {
    struct timeval submit_time;
    double net_diff;
    double share_diff;
+   double stratum_diff;
+   double target_diff;
 };
 
 #define s_stats_size 8
-static struct share_stats_t share_stats[ s_stats_size ];
+static struct share_stats_t share_stats[ s_stats_size ] = {0};
 static int s_get_ptr = 0, s_put_ptr = 0;
 static struct timeval last_submit_time = {0};
 
@@ -886,77 +901,102 @@ static inline int stats_ptr_incr( int p )
 
 void report_summary_log( bool force )
 {
-   struct timeval now, et;
+   struct timeval now, et, uptime, start_time;
 
    pthread_mutex_lock( &stats_lock );
 
    gettimeofday( &now, NULL );
    timeval_subtract( &et, &now, &five_min_start );
 
-   if ( !force && et.tv_sec < 300 )
+   if ( !( force && ( submit_sum || ( et.tv_sec > 5 ) ) )
+        && ( et.tv_sec < 300 ) )
    {
       pthread_mutex_unlock( &stats_lock );
       return;
    }
    
-   // collect and reset global counters
-   double   time     = time_sum;    time_sum    = 0.;
-   uint64_t submits  = submit_sum;  submit_sum  = 0;
-   uint64_t rejects  = reject_sum;  reject_sum  = 0;
-   int      latency  = latency_sum; latency_sum = 0;
+   // collect and reset periodic counters
+   uint64_t submits = submit_sum;  submit_sum = 0;
+   uint64_t accepts = accept_sum;  accept_sum = 0;
+   uint64_t rejects = reject_sum;  reject_sum = 0;
+//   int      latency  = latency_sum; latency_sum = 0;
+   memcpy( &start_time, &five_min_start, sizeof start_time );
    memcpy( &five_min_start, &now, sizeof now );
 
    pthread_mutex_unlock( &stats_lock );
 
-   double   ghrate = global_hashrate;
-   double   scaled_ghrate = ghrate;
-   double   shrate = time == 0. ? 0. : diff_to_hash * last_targetdiff
-                                       * (double)(submits - rejects)  / time;
-   double   scaled_shrate = shrate;
-   int      avg_latency = 0;
-   double   latency_pc  = 0.;
-   double   submit_rate = 0.;
+   timeval_subtract( &et, &now, &start_time );
+   timeval_subtract( &uptime, &now, &session_start );
+   
+   double share_time = (double)et.tv_sec + (double)et.tv_usec / 1e6;
+   double ghrate = global_hashrate;
+   double scaled_ghrate = ghrate;
+   double shrate = share_time == 0. ? 0. : diff_to_hash * last_targetdiff
+                                           * (double)(accepts) / share_time;
+   double sess_hrate = uptime.tv_sec == 0. ? 0. : diff_to_hash * norm_diff_sum
+                                                   / (double)uptime.tv_sec;
+   double scaled_shrate = shrate;
+//   int    avg_latency = 0;
+//   double latency_pc  = 0.;
+   double submit_rate = 0.;
    char shr_units[4] = {0};
    char ghr_units[4] = {0};
+   char sess_hr_units[4] = {0};
    char et_str[24];
+   char upt_str[24];
 
-   if ( submits )
-      avg_latency = latency / submits;
+//   if ( submits )  avg_latency = latency / submits;
 
-   if ( time != 0. )
+   if ( share_time != 0. )
    {
-      submit_rate = (double)submits*60. / time;
-      latency_pc =  (double)latency / (time * 10.);
+      submit_rate = (double)submits*60. / share_time;
+//      latency_pc =  (double)latency / (share_time * 10.);
    }
 
+   if ( ghrate > ref_rate_hi )  ref_rate_hi = ghrate;
+   if ( ghrate < ref_rate_lo )  ref_rate_lo = ghrate;
+
    scale_hash_for_display( &scaled_shrate, shr_units );
    scale_hash_for_display( &scaled_ghrate, ghr_units );
+   scale_hash_for_display( &sess_hrate, sess_hr_units );
+
    sprintf_et( et_str, et.tv_sec );
+   sprintf_et( upt_str, uptime.tv_sec );
 
-   applog( LOG_NOTICE, "Submitted %d shares in %s, %.2f /min, %ld rejected",
-                        submits, et_str, submit_rate, rejects );
-   applog2( LOG_INFO, "Share eqv: %.2f %sh/s, miner ref: %.2f %sh/s",
-           scaled_shrate, shr_units, scaled_ghrate, ghr_units );
+   applog( LOG_NOTICE, "Periodic Report     %s        %s", et_str, upt_str );
+   applog2( LOG_INFO, "Share rate        %.2f/min     %.2f/min",
+                      submit_rate, (double)submitted_share_count*60. /
+                    ( (double)uptime.tv_sec + (double)uptime.tv_usec / 1e6 ) );
+   applog2( LOG_INFO, "Hash rate       %7.2f%sh/s   %7.2f%sh/s   (%.2f%sh/s)",
+                     scaled_shrate, shr_units, sess_hrate, sess_hr_units, 
+                     scaled_ghrate, ghr_units );
+   applog2( LOG_INFO,"Submitted        %6d       %6d",
+                       submits, submitted_share_count );
+   applog2( LOG_INFO,"Accepted         %6d       %6d",
+                       accepts, accepted_share_count );
+   applog2( LOG_INFO,"Rejected         %6d       %6d",
+                       rejects, rejected_share_count );
+//   applog2( LOG_INFO,"Blocks solved                  %6d",
+//                              solved_block_count );
+
+#if !(defined(__WINDOWS__) || defined(__WIN64))
 
-#if ((defined(_WIN64) || defined(__WINDOWS__)))
-   applog2( LOG_INFO, "Network latency %d ms (%.2f%%)",
-                       avg_latency, latency_pc );
-#else
    int temp = cpu_temp(0);
    char tempstr[32];
+   if ( temp > hi_temp ) hi_temp = temp;
 
    if ( use_colors && ( temp >= 70 ) )
    {
       if ( temp >= 80 )
-         sprintf( tempstr, "%sCPU temp %d C%s", CL_WHT CL_RED, temp, CL_N );
+         sprintf( tempstr, "%s%dC%s", CL_WHT CL_RED, temp, CL_N );
       else
-         sprintf( tempstr, "%sCPU temp %d C%s", CL_WHT CL_YLW, temp, CL_N );
+         sprintf( tempstr, "%s%dC%s", CL_WHT CL_YLW, temp, CL_N );
    }
    else
-      sprintf( tempstr, "CPU temp %d C", temp );
+      sprintf( tempstr, "%dC", temp );
+
+   applog2(LOG_INFO,"CPU temp             %s      max %dC", tempstr, hi_temp );
 
-   applog2( LOG_INFO, "Network latency %d ms (%.2f%%), %s",
-                      avg_latency, latency_pc, tempstr );
 #endif
 }
 
@@ -983,6 +1023,7 @@ static int share_result( int result, struct work *null_work,
    }
    else
    {
+      // empty queue, it must have overflowed and stats were lost for a share.
       pthread_mutex_unlock( &stats_lock );
       applog(LOG_WARNING,"Pending shares overflow, stats for share are lost.");
    }
@@ -992,9 +1033,9 @@ static int share_result( int result, struct work *null_work,
    {
       gettimeofday( &ack_time, NULL );
       timeval_subtract( &latency_tv, &ack_time, &my_stats.submit_time );
-      latency = ( latency_tv.tv_sec * 1000  + latency_tv.tv_usec / 1000 );
+      latency = ( latency_tv.tv_sec * 1e3  + latency_tv.tv_usec / 1e3 );
       timeval_subtract( &et, &my_stats.submit_time, &last_submit_time );
-      share_time = (double)et.tv_sec + ( (double)et.tv_usec / 1000000. );
+      share_time = (double)et.tv_sec + ( (double)et.tv_usec / 1e6 );
       memcpy( &last_submit_time, &my_stats.submit_time,
               sizeof last_submit_time );
    }
@@ -1003,11 +1044,23 @@ static int share_result( int result, struct work *null_work,
                                                 my_stats.net_diff * 100.;
 
    // check result
+   if ( result )
+   {
+      accepted_share_count++;
+      if ( ( my_stats.net_diff > 0. ) && ( my_stats.share_diff >= net_diff ) )
+      {
+         solved = true;
+         solved_block_count++;
+      }
+   }
+   else
+      rejected_share_count++;
+/*
    result ? accepted_share_count++ : rejected_share_count++;
    solved = result && (my_stats.net_diff > 0.0 )
             && ( my_stats.share_diff >= net_diff );
    solved_block_count += solved ? 1 : 0 ;
-
+*/
    // update global counters for summary report
    pthread_mutex_lock( &stats_lock );
 
@@ -1019,9 +1072,14 @@ static int share_result( int result, struct work *null_work,
    global_hashcount = hashcount;
    global_hashrate = hashrate;
    
-   time_sum    += share_time;
-   submit_sum  ++;
-   reject_sum  += (uint64_t)!result;
+   if ( result ) 
+   {
+      accept_sum++;
+      norm_diff_sum += my_stats.target_diff;
+   }
+   else
+      reject_sum++;
+   submit_sum++;
    latency_sum += latency;
 
    pthread_mutex_unlock( &stats_lock );
@@ -1057,7 +1115,7 @@ static int share_result( int result, struct work *null_work,
          bin2hex( str3, (unsigned char*)str2, 12 );
          applog2( LOG_INFO, "Hash:   %s...", str3 );
 
-         diff_to_target( str1, last_targetdiff );
+         diff_to_target( str1, my_stats.target_diff );
          for ( int i = 0; i < 8; i++ )
             be32enc( str2 + i, str1[7 - i] );
          bin2hex( str3, (unsigned char*)str2, 12 );
@@ -1569,42 +1627,44 @@ static void *workio_thread(void *userdata)
 	bool ok = true;
 
 	curl = curl_easy_init();
-	if (unlikely(!curl))
-        {
+	if (unlikely( !curl ) )
+   {
 		applog(LOG_ERR, "CURL initialization failed");
 		return NULL;
 	}
-	if(jsonrpc_2 && !have_stratum)
-		ok = rpc2_workio_login(curl);
-	while (ok)
-        {
+	if ( jsonrpc_2 && !have_stratum )
+		ok = rpc2_workio_login( curl );
+
+   while (ok)
+   {
 		struct workio_cmd *wc;
 
 		/* wait for workio_cmd sent to us, on our queue */
 		wc = (struct workio_cmd *) tq_pop(mythr->q, NULL);
 		if (!wc)
-                {
+      {
 			ok = false;
 			break;
 		}
 
 		/* process workio_cmd */
 		switch (wc->cmd)
-                {
-		case WC_GET_WORK:
-			ok = workio_get_work(wc, curl);
-			break;
-		case WC_SUBMIT_WORK:
-			ok = workio_submit_work(wc, curl);
-			break;
+      {
+		   case WC_GET_WORK:
+			   ok = workio_get_work(wc, curl);
+			   break;
+		   case WC_SUBMIT_WORK:
+			   ok = workio_submit_work(wc, curl);
+			   break;
 
-		default:		/* should never happen */
-			ok = false;
-			break;
+		   default:		/* should never happen */
+			   ok = false;
+			   break;
 		}
 		workio_cmd_free(wc);
 	}
-	tq_freeze(mythr->q);
+
+   tq_freeze(mythr->q);
 	curl_easy_cleanup(curl);
 	return NULL;
 }
@@ -1695,17 +1755,18 @@ void work_set_target_ratio( struct work* work, uint32_t* hash )
       work->sharediff = 0.;
 
    // collect some share stats
+   // Frequent share submission combined with high latency can caused
+   // shares to be submitted faster than they are acked. If severe enough
+   // it can overflow the queue and overwrite stats for a share.
    pthread_mutex_lock( &stats_lock );
 
-   // if buffer full discard the stats and don't increment pointer.
-   // We're on the clock so let share_result report it.
-   if ( share_stats[ s_put_ptr ].submit_time.tv_sec == 0 )
-   {
-      gettimeofday( &share_stats[ s_put_ptr ].submit_time, NULL );
-      share_stats[ s_put_ptr ].share_diff = work->sharediff;
-      share_stats[ s_put_ptr ].net_diff = net_diff;
-      s_put_ptr = stats_ptr_incr( s_put_ptr );
-   }
+   gettimeofday( &share_stats[ s_put_ptr ].submit_time, NULL );
+   share_stats[ s_put_ptr ].share_diff = work->sharediff;
+   share_stats[ s_put_ptr ].net_diff = net_diff;
+   share_stats[ s_put_ptr ].stratum_diff = stratum_diff;
+   share_stats[ s_put_ptr ].target_diff = work->targetdiff;
+
+   s_put_ptr = stats_ptr_incr( s_put_ptr );
 
    pthread_mutex_unlock( &stats_lock );
 }
@@ -1715,10 +1776,11 @@ bool submit_solution( struct work *work, void *hash,
 {
   if ( submit_work( thr, work ) )
   {
+     submitted_share_count++;
      work_set_target_ratio( work, hash );
      if ( !opt_quiet )
         applog( LOG_BLUE, "Share %d submitted by thread %d",
-            accepted_share_count + rejected_share_count + 1, thr->id );
+            submitted_share_count, thr->id );
      return true;
   }
   else
@@ -1731,10 +1793,11 @@ bool submit_lane_solution( struct work *work, void *hash,
 {
   if ( submit_work( thr, work ) )
   {
+     submitted_share_count++;
      work_set_target_ratio( work, hash );
      if ( !opt_quiet )
         applog( LOG_BLUE, "Share %d submitted by thread %d, lane %d",
-            accepted_share_count + rejected_share_count + 1, thr->id, lane );
+            submitted_share_count, thr->id, lane );
      return true;
   }
   else
@@ -1793,22 +1856,8 @@ static bool wanna_mine(int thr_id)
 	return state;
 }
 
-void std_wait_for_diff()
-{
-   while ( time(NULL) >= g_work_time + 120 )
-     sleep(1);
-}
-
 // Common target functions, default usually listed first.
 
-// pick your favorite or define your own
-int64_t get_max64_0x1fffffLL() { return 0x1fffffLL; } // default
-int64_t get_max64_0x40LL()     { return 0x40LL;     }
-int64_t get_max64_0x3ffff()    { return 0x3ffff;    }
-int64_t get_max64_0x3fffffLL() { return 0x3fffffLL; }
-int64_t get_max64_0x1ffff()    { return 0x1ffff;    }
-int64_t get_max64_0xffffLL()   { return 0xffffLL;   };
-
 // default
 void sha256d_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
 {
@@ -1943,9 +1992,11 @@ static void *miner_thread( void *userdata )
    // what is an appropriate value that is completely neutral?
    // zero seems to work. No, it breaks benchmark.
 //   uint32_t end_nonce = 0;
-   uint32_t end_nonce = opt_benchmark
-                      ? ( 0xffffffffU / opt_n_threads ) * (thr_id + 1) - 0x20
-                      : 0;
+//   uint32_t end_nonce = opt_benchmark
+//                      ? ( 0xffffffffU / opt_n_threads ) * (thr_id + 1) - 0x20
+//                      : 0;
+   uint32_t end_nonce = 0xffffffffU / opt_n_threads  * (thr_id + 1) - 0x20;
+
    time_t   firstwork_time = 0;
    int  i;
    memset( &work, 0, sizeof(work) );
@@ -1996,37 +2047,42 @@ static void *miner_thread( void *userdata )
       // Default affinity
       if ( (opt_affinity == (uint128_t)(-1) ) && opt_n_threads > 1 )
       {  
+         affine_to_cpu_mask( thr_id, (uint128_t)1 << (thr_id % num_cpus) );
          if ( opt_debug )
             applog( LOG_DEBUG, "Binding thread %d to cpu %d.",
                     thr_id, thr_id % num_cpus,
 	                 u128_hi64( (uint128_t)1 << (thr_id % num_cpus) ),
 		              u128_lo64( (uint128_t)1 << (thr_id % num_cpus) ) );
-         affine_to_cpu_mask( thr_id, (uint128_t)1 << (thr_id % num_cpus) );
       }
 #else
-      if ( (opt_affinity == -1LL) && opt_n_threads > 1 ) 
+      if ( ( opt_affinity == -1 ) && ( opt_n_threads > 1 ) ) 
       {
+         affine_to_cpu_mask( thr_id, 1 << (thr_id % num_cpus) );
          if (opt_debug)
             applog( LOG_DEBUG, "Binding thread %d to cpu %d.",
-                thr_id, thr_id % num_cpus, 1LL << (thr_id % num_cpus)) ;
-         affine_to_cpu_mask( thr_id, 1ULL << (thr_id % num_cpus) );
+                thr_id, thr_id % num_cpus, 1 << (thr_id % num_cpus)) ;
       }
 #endif
       else   // Custom affinity
       {
+         affine_to_cpu_mask( thr_id, opt_affinity );
+         if ( opt_debug )
+         {
 #if AFFINITY_USES_UINT128
-         if (opt_debug)
-             applog( LOG_DEBUG, "Binding thread %d to mask %016llx %016llx",
+            if ( num_cpus > 64 )
+               applog( LOG_DEBUG, "Binding thread %d to mask %016llx %016llx",
                                 thr_id, u128_hi64( opt_affinity ), 
                                         u128_lo64( opt_affinity ) );
+            else
+               applog( LOG_DEBUG, "Binding thread %d to mask %016llx",
+                                 thr_id, opt_affinity );
 #else
-         if (opt_debug)
-             applog( LOG_DEBUG, "Binding thread %d to mask %016llx",
+            applog( LOG_DEBUG, "Binding thread %d to mask %016llx",
                                  thr_id, opt_affinity );
 #endif
-      affine_to_cpu_mask( thr_id, opt_affinity );
+         }
       }
-   }
+   }  // num_cpus > 1
 
    if ( !algo_gate.miner_thread_init( thr_id ) )
    {
@@ -2034,18 +2090,20 @@ static void *miner_thread( void *userdata )
       exit (1);
    }
 
+   // wait for stratum to send first job
+   if ( have_stratum ) while ( !stratum.job.job_id ) sleep(1);
+
    while (1)
    {
        uint64_t hashes_done;
        struct timeval tv_start, tv_end, diff;
-       int64_t max64;
+       int64_t max64 = 1000;
        int nonce_found = 0;
 
        if ( algo_gate.do_this_thread( thr_id ) )
        {
           if ( have_stratum )
           {
-              algo_gate.wait_for_diff( &stratum );
       	     pthread_mutex_lock( &g_work_lock );
               if ( *algo_gate.get_nonceptr( work.data ) >= end_nonce )
                  algo_gate.stratum_gen_work( &stratum, &g_work );
@@ -2117,11 +2175,13 @@ static void *miner_thread( void *userdata )
           }
           if ( remain < max64 ) max64 = remain;
        }
-       // max64
+       // Select nonce range for approx 1 min duration based
+       // on hashrate, initial value arbitrarilly set to 1000 just to get
+       // a sample hashrate for the next time.
        uint32_t work_nonce = *( algo_gate.get_nonceptr( work.data ) );
-       max64 *= thr_hashrates[thr_id];
+       max64 = 60 * thr_hashrates[thr_id];
        if ( max64 <= 0)
-          max64 = (int64_t)algo_gate.get_max64();
+          max64 = 1000;
        if ( work_nonce + max64 > end_nonce )
           max_nonce = end_nonce;
        else
@@ -2171,8 +2231,6 @@ static void *miner_thread( void *userdata )
              pthread_mutex_unlock( &g_work_lock );
           }
        }
-       // Check if time for summary report
-       report_summary_log( false );
        // display hashrate
        if ( !opt_quiet )
        {
@@ -2199,28 +2257,6 @@ static void *miner_thread( void *userdata )
                                   thr_id, hc, hc_units, hr, hr_units );
              }
           }
-/*
-          if ( thr_id == 0 && !opt_benchmark )
-          {
-             hashcount = 0.;
-             hashrate = 0.;
-             for ( i = 0; i < opt_n_threads; i++ )
-             {
-                 hashrate  += thr_hashrates[i];
-                 hashcount += thr_hashcount[i];
-             }
-             if ( hashcount != 0. )
-             {
-                scale_hash_for_display( &hashcount, hc_units );
-                scale_hash_for_display( &hashrate,  hr_units );
-                if ( hc_units[0] )
-                   sprintf( hc, "%.2f", hashcount );
-                else  // no fractions of a hash
-                   sprintf( hc, "%.0f", hashcount );
-                sprintf( hr, "%.2f", hashrate );
-             }
-          }
-*/
        }
 
        // Display benchmark total
@@ -2260,8 +2296,8 @@ static void *miner_thread( void *userdata )
                          hc, hc_units, hr, hr_units, (uint32_t)cpu_temp(0) );
 #endif
              }
-	  }
-       }
+	       }
+       }  // benchmark
    }  // miner_thread loop
 
 out:
@@ -2582,11 +2618,7 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
        char share_ttf[32];
   
        if ( stratum_diff != sctx->job.diff )
-       {
-          // If diff is changing report summary from old diff first.       
-          report_summary_log( stratum_diff != 0. );
           applog( LOG_BLUE, "New stratum difficulty" );
-       }
        if ( last_block_height != sctx->block_height )
           applog( LOG_BLUE, "New block" );
 
@@ -2614,6 +2646,8 @@ void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
    work_free( g_work );
    work_copy( g_work, &sctx->work );
    pthread_mutex_unlock( &sctx->work_lock );
+   if ( last_block_height != stratum.block_height )
+       last_block_height = stratum.block_height;
 }
 
 static void *stratum_thread(void *userdata )
@@ -2642,6 +2676,8 @@ static void *stratum_thread(void *userdata )
           }
           else // if ( !opt_quiet )
 	          applog(LOG_WARNING, "Stratum connection reset");
+          // reset stats queue as well
+          s_get_ptr = s_put_ptr = 0;
       }
 
       while ( !stratum.curl )
@@ -2673,6 +2709,9 @@ static void *stratum_thread(void *userdata )
          }
       }
 
+      report_summary_log( ( stratum_diff != stratum.job.diff )
+                       && ( stratum_diff != 0. ) );
+      
       if ( stratum.job.job_id
           && ( !g_work_time || strcmp( stratum.job.job_id, g_work.job_id ) ) )
       {
@@ -2682,40 +2721,53 @@ static void *stratum_thread(void *userdata )
          pthread_mutex_unlock(&g_work_lock);
          restart_threads();
 
+/*
          if ( stratum.job.clean || jsonrpc_2 )
          {
             static uint32_t last_block_height;
             if ( last_block_height != stratum.block_height )
             {
                last_block_height = stratum.block_height;
-/*
-               if ( !opt_quiet )
-               {
-                  if ( net_diff > 0. )
-                     applog( LOG_BLUE,
-                             "%s block %d, job %s, network diff %.4f",
-                             algo_names[opt_algo], stratum.bloc_height,
-                             g_work.job_id, net_diff);
-                  else
-	                  applog( LOG_BLUE, "%s %s block %d, job %s",
-                             short_url, algo_names[opt_algo],
-                             stratum.bloc_height, g_work.job_id );
-	             }
-*/
             }
-//            else if ( !opt_quiet )
-//               applog( LOG_BLUE,"New job %s.", g_work.job_id );
 
          }
-         else if (opt_debug && !opt_quiet)
+         else
+*/
+         if (opt_debug && !opt_quiet)
          {
             applog( LOG_BLUE, "%s asks job %d for block %d", short_url,
                 strtoul( stratum.job.job_id, NULL, 16 ), stratum.block_height );
          }
       }  // stratum.job.job_id
 
+     if ( stratum_socket_full( &stratum, opt_timeout ) )
+     {
+        s = stratum_recv_line(&stratum);
+        if ( !s )
+           applog(LOG_WARNING, "Stratum connection interrupted");
+     }
+     else
+     {
+        s = NULL;
+        applog(LOG_ERR, "Stratum connection timeout");
+     }
+
+     if ( s )
+     {
+        if ( !stratum_handle_method( &stratum, s ) )
+           stratum_handle_response( s );
+        free( s );
+     }
+     else
+     {
+        // stratum_errors++;
+        // check if this redundant
+        stratum_disconnect( &stratum );
+     }   
+/*
      if ( !stratum_socket_full( &stratum, opt_timeout ) )
      {
+        stratum_errors++;
         applog(LOG_ERR, "Stratum connection timeout");
         s = NULL;
      }
@@ -2724,13 +2776,14 @@ static void *stratum_thread(void *userdata )
      if ( !s )
      {
         stratum_disconnect(&stratum);
-//	  applog(LOG_WARNING, "Stratum connection interrupted");
+        applog(LOG_WARNING, "Stratum connection interrupted");
         continue;
      }
      if (!stratum_handle_method(&stratum, s))
           stratum_handle_response(s);
      free(s);
-  }  // loop
+*/
+   }  // loop
 out:
   return NULL;
 }
@@ -3074,10 +3127,6 @@ void parse_arg(int key, char *arg )
 	case 1012:
 		opt_extranonce = false;
 		break;
-	case 1013:
-      applog( LOG_WARNING, "hide-diff option is deprecated and has no effect.\n                      It will be removed in a future release. Stop using it.");
-		opt_showdiff = false;
-		break;
    case 1014:   // hash-meter
       opt_hash_meter = true;
       break;
@@ -3124,7 +3173,7 @@ void parse_arg(int key, char *arg )
 // than 64 CPUs, otherwise zero extend the upper half.
                 opt_affinity = (uint128_t)ul;
                 if ( num_cpus > 64 )
-                   opt_affinity = (opt_affinity << 64 ) | (uint128_t)ul;
+                   opt_affinity = (opt_affinity << 64 ) | opt_affinity;
 #else
                    opt_affinity = ul;
 #endif
@@ -3303,10 +3352,10 @@ bool check_cpu_capability ()
      bool cpu_has_sse2   = has_sse2();
      bool cpu_has_aes    = has_aes_ni();
      bool cpu_has_sse42  = has_sse42();
-     bool cpu_has_avx    = has_avx1();
+     bool cpu_has_avx    = has_avx();
      bool cpu_has_avx2   = has_avx2();
      bool cpu_has_sha    = has_sha();
-     bool cpu_has_avx512 = has_avx512f();
+     bool cpu_has_avx512 = has_avx512();
      bool sw_has_aes    = false;
      bool sw_has_sse42  = false;
      bool sw_has_avx    = false;
@@ -3340,16 +3389,16 @@ bool check_cpu_capability ()
      #ifdef __AVX2__
          sw_has_avx2 = true;
      #endif
-     #ifdef __AVX512F__
+     #if (defined(__AVX512F__) && defined(__AVX51DQF__) && defined(__AVX51BW__) && defined(__AVX512VL__))
          sw_has_avx512 = true;
      #endif
      #ifdef __SHA__
          sw_has_sha = true;
      #endif
 
-     #if !((__AES__) || (__SSE2__))
-         printf("Neither __AES__ nor __SSE2__ defined.\n");
-     #endif
+//     #if !((__AES__) || (__SSE2__))
+//         printf("Neither __AES__ nor __SSE2__ defined.\n");
+//     #endif
 
      cpu_brand_string( cpu_brand );
      printf( "CPU: %s.\n", cpu_brand );
@@ -3465,7 +3514,7 @@ int main(int argc, char *argv[])
 	rpc_pass = strdup("");
 	opt_api_allow = strdup("127.0.0.1"); /* 0.0.0.0 for all ips */
 
-        parse_cmdline(argc, argv);
+   parse_cmdline(argc, argv);
 
 #if defined(WIN32)
 //	SYSTEM_INFO sysinfo;
@@ -3486,9 +3535,9 @@ int main(int argc, char *argv[])
 		applog(LOG_DEBUG, "Found %d cpus on cpu group %d", cpus, i);
 	}
 #else
-      SYSTEM_INFO sysinfo;
-      GetSystemInfo(&sysinfo);
-      num_cpus = sysinfo.dwNumberOfProcessors;
+   SYSTEM_INFO sysinfo;
+   GetSystemInfo(&sysinfo);
+   num_cpus = sysinfo.dwNumberOfProcessors;
 #endif
 
 #elif defined(_SC_NPROCESSORS_CONF)
@@ -3504,21 +3553,21 @@ int main(int argc, char *argv[])
 		num_cpus = 1;
 
 
-        if (!opt_n_threads)
-                opt_n_threads = num_cpus;
+   if (!opt_n_threads)
+      opt_n_threads = num_cpus;
 
-        if ( opt_algo == ALGO_NULL )
-        {
-            fprintf(stderr, "%s: no algo supplied\n", argv[0]);
-            show_usage_and_exit(1);
-        }
+   if ( opt_algo == ALGO_NULL )
+   {
+      fprintf(stderr, "%s: no algo supplied\n", argv[0]);
+      show_usage_and_exit(1);
+   }
 	if ( !opt_benchmark )
-        {
-            if ( !short_url )
-            {
-               fprintf(stderr, "%s: no URL supplied\n", argv[0]);
-               show_usage_and_exit(1);
-            }
+   {
+      if ( !short_url )
+      {
+         fprintf(stderr, "%s: no URL supplied\n", argv[0]);
+         show_usage_and_exit(1);
+      }
 /*
             if ( !rpc_url )
             {
@@ -3557,6 +3606,7 @@ int main(int argc, char *argv[])
    memset( share_stats, 0, 2 *  sizeof (struct share_stats_t) );
    gettimeofday( &last_submit_time, NULL );
    memcpy( &five_min_start, &last_submit_time, sizeof (struct timeval) );
+   memcpy( &session_start, &last_submit_time, sizeof (struct timeval) );
 
    if ( !check_cpu_capability() ) exit(1);
 
@@ -3578,7 +3628,7 @@ int main(int argc, char *argv[])
 
 #ifndef WIN32
 	if (opt_background)
-        {
+   {
 		i = fork();
 		if (i < 0) exit(1);
 		if (i > 0) exit(0);
@@ -3596,7 +3646,7 @@ int main(int argc, char *argv[])
 #else
 	SetConsoleCtrlHandler((PHANDLER_ROUTINE)ConsoleHandler, TRUE);
 	if (opt_background)
-        {
+   {
 		HWND hcon = GetConsoleWindow();
 		if (hcon) {
 			// this method also hide parent command line window
@@ -3608,7 +3658,7 @@ int main(int argc, char *argv[])
 		}
 	}
 	if (opt_priority > 0)
-        {
+   {
 		DWORD prio = NORMAL_PRIORITY_CLASS;
 		switch (opt_priority) {
 		case 1:
@@ -3631,20 +3681,32 @@ int main(int argc, char *argv[])
      applog( LOG_INFO,"%u CPU cores available, %u miner threads selected.",
              num_cpus, opt_n_threads );
 
-// To be reviewed
+// To be confirmed with more than 64 cpus
    if ( opt_affinity != -1 )
    {
-      if ( num_cpus > 64 )
+      if ( !affinity_uses_uint128 && num_cpus > 64 )
       {
-          applog(LOG_WARNING,"--cpu-affinity argument is not supported with more");
-          applog(LOG_WARNING," than 64 CPUs, using default affinity.");
+          applog(LOG_WARNING,"Setting CPU affinity with more than 64 CPUs is only");
+          applog(LOG_WARNING,"available on Linux. Using default affinity.");
           opt_affinity = -1;
       }
       else	
       {
-         if (!opt_quiet)
-            applog(LOG_DEBUG, "Binding process to cpu mask %x", opt_affinity);
-         affine_to_cpu_mask( -1, (unsigned long)opt_affinity );
+         affine_to_cpu_mask( -1, opt_affinity );
+         if ( !opt_quiet )
+         {
+#if AFFINITY_USES_UINT128
+            if ( num_cpus > 64 )
+               applog(LOG_DEBUG, "Binding process to cpu mask %x",
+                      u128_hi64( opt_affinity ), u128_lo64( opt_affinity ) );
+            else 
+               applog(LOG_DEBUG, "Binding process to cpu mask %x",
+                      opt_affinity );
+#else
+               applog(LOG_DEBUG, "Binding process to cpu mask %x",
+                      opt_affinity );
+#endif
+         }
       }
    }
 
@@ -3687,7 +3749,7 @@ int main(int argc, char *argv[])
 
 	/* ESET-NOD32 Detects these 2 thread_create... */
 	if (want_longpoll && !have_stratum)
-        {
+   {
 		/* init longpoll thread info */
 		longpoll_thr_id = opt_n_threads + 1;
 		thr = &thr_info[longpoll_thr_id];
@@ -3703,7 +3765,7 @@ int main(int argc, char *argv[])
 		}
 	}
 	if (want_stratum)
-        {
+   {
 		/* init stratum thread info */
 		stratum_thr_id = opt_n_threads + 2;
 		thr = &thr_info[stratum_thr_id];
@@ -3723,7 +3785,7 @@ int main(int argc, char *argv[])
 	}
 
 	if (opt_api_listen)
-        {
+   {
 		/* api thread */
 		api_thr_id = opt_n_threads + 3;
 		thr = &thr_info[api_thr_id];
@@ -3740,7 +3802,7 @@ int main(int argc, char *argv[])
 
 	/* start mining threads */
 	for (i = 0; i < opt_n_threads; i++)
-        {
+   {
 		thr = &thr_info[i];
 		thr->id = i;
 		thr->q = tq_new();
diff --git a/miner.h b/miner.h
index 5a85dda..73face1 100644
--- a/miner.h
+++ b/miner.h
@@ -352,6 +352,7 @@ bool   submit_lane_solution( struct work *work, void *hash,
 
 
 void   get_currentalgo( char* buf, int sz );
+/*
 bool   has_sha();
 bool   has_aes_ni();
 bool   has_avx1();
@@ -368,6 +369,7 @@ void   cpu_getmodelid(char *outbuf, size_t maxsz);
 void   cpu_brand_string( char* s );
 
 float cpu_temp( int core );
+*/
 
 struct work {
 	uint32_t data[48] __attribute__ ((aligned (64)));
@@ -724,7 +726,6 @@ extern bool opt_debug;
 extern bool opt_debug_diff;
 extern bool opt_benchmark;
 extern bool opt_protocol;
-extern bool opt_showdiff;
 extern bool opt_extranonce;
 extern bool opt_quiet;
 extern bool opt_redirect;
@@ -867,7 +868,7 @@ Options:\n\
                           x16rv2        Ravencoin (RVN)\n\
                           x16rt         Gincoin (GIN)\n\
                           x16rt-veil    Veil (VEIL)\n\
-                          x16s          Pigeoncoin (PGN)\n\
+                          x16s\n\
                           x17\n\
                           x21s\n\
                           xevan         Bitsend (BSD)\n\
@@ -901,7 +902,6 @@ Options:\n\
       -f, --diff-factor     Divide req. difficulty by this factor (std is 1.0)\n\
   -m, --diff-multiplier Multiply difficulty by this factor (std is 1.0)\n\
       --hash-meter      Display thread hash rates\n\
-      --hide-diff       Do not display changes in difficulty\n\
       --coinbase-addr=ADDR  payout address for solo mining\n\
       --coinbase-sig=TEXT  data to insert in the coinbase when possible\n\
       --no-longpoll     disable long polling support\n\
@@ -965,7 +965,6 @@ static struct option const options[] = {
         { "diff", 1, NULL, 'f' }, // deprecated (alias)
         { "diff-multiplier", 1, NULL, 'm' },
         { "hash-meter", 0, NULL, 1014 },
-        { "hide-diff", 0, NULL, 1013 },
         { "help", 0, NULL, 'h' },
         { "key", 1, NULL, 'K' },
         { "no-gbt", 0, NULL, 1011 },
diff --git a/simd-utils/simd-128.h b/simd-utils/simd-128.h
index a507f49..1a81efa 100644
--- a/simd-utils/simd-128.h
+++ b/simd-utils/simd-128.h
@@ -298,30 +298,38 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
 // 64 and 32 bit elements.
 
 // compiler doesn't like when a variable is used for the last arg of
-// _mm_rol_epi32, must be "8 bit immediate".
+// _mm_rol_epi32, must be "8 bit immediate". Therefore use rol_var where
+// necessary.
 // sm3-hash-4way.c fails to compile.
+
+#define mm128_ror_var_64( v, c ) \
+   _mm_or_si128( _mm_srli_epi64( v, c ), _mm_slli_epi64( v, 64-(c) ) )
+
+#define mm128_rol_var_64( v, c ) \
+   _mm_or_si128( _mm_slli_epi64( v, c ), _mm_srli_epi64( v, 64-(c) ) )
+
+#define mm128_ror_var_32( v, c ) \
+   _mm_or_si128( _mm_srli_epi32( v, c ), _mm_slli_epi32( v, 32-(c) ) )
+
+#define mm128_rol_var_32( v, c ) \
+   _mm_or_si128( _mm_slli_epi32( v, c ), _mm_srli_epi32( v, 32-(c) ) )
+
+
 /*
 #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
 
-#define mm128_ror_64( v, c )    _mm_ror_epi64( v, c )
-#define mm128_rol_64( v, c )    _mm_rol_epi64( v, c )
-#define mm128_ror_32( v, c )    _mm_ror_epi32( v, c )
-#define mm128_rol_32( v, c )    _mm_rol_epi32( v, c )
+#define mm128_ror_64    _mm_ror_epi64
+#define mm128_rol_64    _mm_rol_epi64
+#define mm128_ror_32    _mm_ror_epi32
+#define mm128_rol_32    _mm_rol_epi32
 
 #else
 */
 
-#define mm128_ror_64( v, c ) \
-   _mm_or_si128( _mm_srli_epi64( v, c ), _mm_slli_epi64( v, 64-(c) ) )
-
-#define mm128_rol_64( v, c ) \
-   _mm_or_si128( _mm_slli_epi64( v, c ), _mm_srli_epi64( v, 64-(c) ) )
-
-#define mm128_ror_32( v, c ) \
-   _mm_or_si128( _mm_srli_epi32( v, c ), _mm_slli_epi32( v, 32-(c) ) )
-
-#define mm128_rol_32( v, c ) \
-   _mm_or_si128( _mm_slli_epi32( v, c ), _mm_srli_epi32( v, 32-(c) ) )
+#define mm128_ror_64   mm128_ror_var_64
+#define mm128_rol_64   mm128_rol_var_64
+#define mm128_ror_32   mm128_ror_var_32
+#define mm128_rol_32   mm128_rol_var_32
 
 //#endif   // AVX512 else
 
diff --git a/simd-utils/simd-256.h b/simd-utils/simd-256.h
index 185cd37..e850697 100644
--- a/simd-utils/simd-256.h
+++ b/simd-utils/simd-256.h
@@ -367,38 +367,49 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )
 //
 // AVX512 has bit rotate for 256 bit vectors with 64 or 32 bit elements
 
+
 // compiler doesn't like when a variable is used for the last arg of
-// _mm_rol_epi32, must be "8 bit immediate".
+// _mm_rol_epi32, must be "8 bit immediate". Therefore use rol_var where
+// necessary. 
+
+#define mm256_ror_var_64( v, c ) \
+   _mm256_or_si256( _mm256_srli_epi64( v, c ), \
+                    _mm256_slli_epi64( v, 64-(c) ) )
+
+#define mm256_rol_var_64( v, c ) \
+   _mm256_or_si256( _mm256_slli_epi64( v, c ), \
+                    _mm256_srli_epi64( v, 64-(c) ) )
+
+#define mm256_ror_var_32( v, c ) \
+   _mm256_or_si256( _mm256_srli_epi32( v, c ), \
+                    _mm256_slli_epi32( v, 32-(c) ) )
+
+#define mm256_rol_var_32( v, c ) \
+   _mm256_or_si256( _mm256_slli_epi32( v, c ), \
+                    _mm256_srli_epi32( v, 32-(c) ) )
+
 /*
 #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
 
-#define mm256_ror_64( v, c )    _mm256_ror_epi64( v, c )
-#define mm256_rol_64( v, c )    _mm256_rol_epi64( v, c )
-#define mm256_ror_32( v, c )    _mm256_ror_epi32( v, c )
-#define mm256_rol_32( v, c )    _mm256_rol_epi32( v, c )
+// AVX512, control must be 8 bit immediate.
+
+#define mm256_ror_64    _mm256_ror_epi64
+#define mm256_rol_64    _mm256_rol_epi64
+#define mm256_ror_32    _mm256_ror_epi32
+#define mm256_rol_32    _mm256_rol_epi32
 
 #else
 */
 
-#define mm256_ror_64( v, c ) \
-   _mm256_or_si256( _mm256_srli_epi64( v, c ), \
-                    _mm256_slli_epi64( v, 64-(c) ) )
+// No AVX512, use fallback.
 
-#define mm256_rol_64( v, c ) \
-   _mm256_or_si256( _mm256_slli_epi64( v, c ), \
-                    _mm256_srli_epi64( v, 64-(c) ) )
-
-#define mm256_ror_32( v, c ) \
-   _mm256_or_si256( _mm256_srli_epi32( v, c ), \
-                    _mm256_slli_epi32( v, 32-(c) ) )
-
-#define mm256_rol_32( v, c ) \
-   _mm256_or_si256( _mm256_slli_epi32( v, c ), \
-                    _mm256_srli_epi32( v, 32-(c) ) )
+#define mm256_ror_64    mm256_ror_var_64 
+#define mm256_rol_64    mm256_rol_var_64
+#define mm256_ror_32    mm256_ror_var_32
+#define mm256_rol_32    mm256_rol_var_32
 
 // #endif     // AVX512 else
 
-
 #define  mm256_ror_16( v, c ) \
    _mm256_or_si256( _mm256_srli_epi16( v, c ), \
                     _mm256_slli_epi16( v, 16-(c) ) )
diff --git a/simd-utils/simd-512.h b/simd-utils/simd-512.h
index d5ed059..cd8d514 100644
--- a/simd-utils/simd-512.h
+++ b/simd-utils/simd-512.h
@@ -278,7 +278,7 @@ static inline __m512i mm512_neg1_fn()
 // Horizontal vector testing
 
 #define mm512_allbits0( a )    _mm512_cmpeq_epi64_mask( a, m512_zero )
-#define mm256_allbits1( a )    _mm512_cmpeq_epi64_mask( a, m512_neg1 )
+#define mm512_allbits1( a )    _mm512_cmpeq_epi64_mask( a, m512_neg1 )
 #define mm512_anybits0( a )    _mm512_cmpneq_epi64_mask( a, m512_neg1 )
 #define mm512_anybits1( a )    _mm512_cmpneq_epi64_mask( a, m512_zero )
 
@@ -287,11 +287,30 @@ static inline __m512i mm512_neg1_fn()
 // Bit rotations.
 
 // AVX512F has built-in fixed and variable bit rotation for 64 & 32 bit
-// elements and can be called directly.
+// elements and can be called directly. But they only accept immediate 8
+// for control arg. 
 //
 // _mm512_rol_epi64,  _mm512_ror_epi64,  _mm512_rol_epi32,  _mm512_ror_epi32
 // _mm512_rolv_epi64, _mm512_rorv_epi64, _mm512_rolv_epi32, _mm512_rorv_epi32
 //
+
+#define mm512_ror_var_64( v, c ) \
+   _mm512_or_si512( _mm512_srli_epi64( v, c ), \
+                    _mm512_slli_epi64( v, 64-(c) ) )
+
+#define mm512_rol_var_64( v, c ) \
+   _mm512_or_si512( _mm512_slli_epi64( v, c ), \
+                    _mm512_srli_epi64( v, 64-(c) ) )
+
+#define mm512_ror_var_32( v, c ) \
+   _mm512_or_si512( _mm512_srli_epi32( v, c ), \
+                    _mm512_slli_epi32( v, 32-(c) ) )
+
+#define mm512_rol_var_32( v, c ) \
+   _mm512_or_si512( _mm512_slli_epi32( v, c ), \
+                    _mm512_srli_epi32( v, 32-(c) ) )
+
+
 // Here is a fixed bit rotate for 16 bit elements:
 #define mm512_ror_16( v, c ) \
     _mm512_or_si512( _mm512_srli_epi16( v, c ), \
@@ -300,6 +319,8 @@ static inline __m512i mm512_neg1_fn()
     _mm512_or_si512( _mm512_slli_epi16( v, c ), \
                      _mm512_srli_epi16( v, 16-(c) )
 
+
+
 // Rotations using a vector control index are very slow due to overhead
 // to generate the index vector. Repeated rotations using the same index
 // are better handled by the calling function where the index only needs
diff --git a/sysinfos.c b/sysinfos.c
index cf8fb8f..76f9815 100644
--- a/sysinfos.c
+++ b/sysinfos.c
@@ -1,8 +1,13 @@
+#if !defined(SYSINJFOS_C___)
+#define SYSINFOS_C__
+
 /**
  * Unit to read cpu informations
  *
  * tpruvot 2014
- */
+ * JayDDee 2019
+ * 
+*/
 
 #include <stdio.h>
 #include <ctype.h>
@@ -28,7 +33,7 @@
 #define HWMON_ALT5 \
 "/sys/class/hwmon/hwmon0/device/temp1_input"
 
-static float linux_cputemp(int core)
+static inline float linux_cputemp(int core)
 {
 	float tc = 0.0;
 	FILE *fd = fopen(HWMON_PATH, "r");
@@ -60,7 +65,7 @@ static float linux_cputemp(int core)
 
 #define CPUFREQ_PATH \
  "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq"
-static uint32_t linux_cpufreq(int core)
+static inline uint32_t linux_cpufreq(int core)
 {
 	FILE *fd = fopen(CPUFREQ_PATH, "r");
 	uint32_t freq = 0;
@@ -76,7 +81,7 @@ static uint32_t linux_cpufreq(int core)
 
 #else /* WIN32 */
 
-static float win32_cputemp(int core)
+static inline float win32_cputemp(int core)
 {
 	// todo
 	return 0.0;
@@ -88,7 +93,7 @@ static float win32_cputemp(int core)
 /* exports */
 
 
-float cpu_temp(int core)
+static inline float cpu_temp(int core)
 {
 #ifdef WIN32
 	return win32_cputemp(core);
@@ -97,7 +102,7 @@ float cpu_temp(int core)
 #endif
 }
 
-uint32_t cpu_clock(int core)
+static inline uint32_t cpu_clock(int core)
 {
 #ifdef WIN32
 	return 0;
@@ -106,7 +111,7 @@ uint32_t cpu_clock(int core)
 #endif
 }
 
-int cpu_fanpercent()
+static inline int cpu_fanpercent()
 {
 	return 0;
 }
@@ -142,7 +147,7 @@ static inline void cpuid(int functionnumber, int output[4]) {
 #define cpuid(fn, out) out[0] = 0;
 #endif
 
-void cpu_getname(char *outbuf, size_t maxsz)
+static inline void cpu_getname(char *outbuf, size_t maxsz)
 {
    memset(outbuf, 0, maxsz);
 #ifdef WIN32
@@ -190,7 +195,7 @@ void cpu_getname(char *outbuf, size_t maxsz)
 #endif
 }
 
-void cpu_getmodelid(char *outbuf, size_t maxsz)
+static inline void cpu_getmodelid(char *outbuf, size_t maxsz)
 {
    memset(outbuf, 0, maxsz);
 #ifdef WIN32
@@ -259,32 +264,47 @@ void cpu_getmodelid(char *outbuf, size_t maxsz)
 #define CPU_BRAND_2          (0x80000003)
 #define CPU_BRAND_3          (0x80000004)
 
+// Registers
 #define EAX_Reg  (0)
 #define EBX_Reg  (1)
 #define ECX_Reg  (2)
 #define EDX_Reg  (3)
 
-#define XSAVE_Flag    (1<<26) // ECX
+// Feature flags
+
+// CPU_INFO ECX
+#define XSAVE_Flag    (1<<26) 
 #define OSXSAVE_Flag  (1<<27)
-#define AVX1_Flag     (1<<28)
+#define AVX_Flag     (1<<28)
 #define XOP_Flag      (1<<11)
 #define FMA3_Flag     (1<<12)
 #define AES_Flag      (1<<25)
 #define SSE42_Flag    (1<<20)
 
+// CPU_INFO EDX
 #define SSE_Flag      (1<<25) // EDX
 #define SSE2_Flag     (1<<26) 
 
-#define AVX2_Flag     (1<< 5) // ADV EBX
+// EXTENDED_FEATURES EBX
+#define AVX2_Flag     (1<< 5)
 #define AVX512F_Flag  (1<<16)
+#define AVX512DQ_Flag (1<<17)
 #define SHA_Flag      (1<<29)
+#define AVX512BW_Flag (1<<30)
+#define AVX512VL_Flag (1<<31)
+
+// EXTENDED_FEATURES ECX
+#define AVX512VBMI_Flag  (1<<1) 
+#define AVX512VBMI2_Flag (1<<6)
+#define AVX512VAES_Flag  (1<<9)
+
 
 // Use this to detect presence of feature
-#define AVX1_mask     (AVX1_Flag|XSAVE_Flag|OSXSAVE_Flag)
-#define FMA3_mask     (FMA3_Flag|AVX1_mask)
+#define AVX_mask     (AVX_Flag|XSAVE_Flag|OSXSAVE_Flag)
+#define FMA3_mask     (FMA3_Flag|AVX_mask)
+#define AVX512_mask   (AVX512VL_Flag|AVX512BW_Flag|AVX512DQ_Flag|AVX512F_Flag)
 
-
-static inline bool has_sha_()
+static inline bool has_sha()
 {
 #ifdef __arm__
     return false;
@@ -295,10 +315,7 @@ static inline bool has_sha_()
 #endif
 }
 
-bool has_sha() { return has_sha_(); }
-
-
-static inline bool has_sse2_()
+static inline bool has_sse2()
 {
 #ifdef __arm__
     return false;
@@ -309,10 +326,8 @@ static inline bool has_sse2_()
 #endif
 }
 
-bool has_sse2() { return has_sse2_(); } 
-
-// nehalem and above, no AVX1 on nehalem
-static inline bool has_aes_ni_()
+// nehalem and above, no AVX on nehalem
+static inline bool has_aes_ni()
 {
 #ifdef __arm__
 	return false;
@@ -323,24 +338,20 @@ static inline bool has_aes_ni_()
 #endif
 }
 
-bool has_aes_ni() { return has_aes_ni_(); }
-
 // westmere and above
-static inline bool has_avx1_()
+static inline bool has_avx()
 {
 #ifdef __arm__
         return false;
 #else
         int cpu_info[4] = { 0 };
         cpuid( CPU_INFO, cpu_info );
-        return ( ( cpu_info[ ECX_Reg ] & AVX1_mask ) == AVX1_mask );
+        return ( ( cpu_info[ ECX_Reg ] & AVX_mask ) == AVX_mask );
 #endif
 }
 
-bool has_avx1() { return has_avx1_(); }
-
 // haswell and above
-static inline bool has_avx2_()
+static inline bool has_avx2()
 {
 #ifdef __arm__
     return false;
@@ -351,9 +362,7 @@ static inline bool has_avx2_()
 #endif
 }
 
-bool has_avx2() { return has_avx2_(); }
-
-static inline bool has_avx512f_()
+static inline bool has_avx512f()
 {
 #ifdef __arm__
     return false;
@@ -364,24 +373,75 @@ static inline bool has_avx512f_()
 #endif
 }
 
-bool has_avx512f() { return has_avx512f_(); }
+static inline bool has_avx512dq()
+{
+#ifdef __arm__
+    return false;
+#else
+    int cpu_info[4] = { 0 };
+    cpuid( EXTENDED_FEATURES, cpu_info );
+    return cpu_info[ EBX_Reg ] & AVX512DQ_Flag;
+#endif
+}
 
+static inline bool has_avx512bw()
+{
+#ifdef __arm__
+    return false;
+#else
+    int cpu_info[4] = { 0 };
+    cpuid( EXTENDED_FEATURES, cpu_info );
+    return cpu_info[ EBX_Reg ] & AVX512BW_Flag;
+#endif
+}
+
+static inline bool has_avx512vl()
+{
+#ifdef __arm__
+    return false;
+#else
+    int cpu_info[4] = { 0 };
+    cpuid( EXTENDED_FEATURES, cpu_info );
+    return cpu_info[ EBX_Reg ] & AVX512VL_Flag;
+#endif
+}
+
+// Minimum to be useful
+static inline bool has_avx512()
+{
+#ifdef __arm__
+    return false;
+#else
+    int cpu_info[4] = { 0 };
+    cpuid( EXTENDED_FEATURES, cpu_info );
+    return ( ( cpu_info[ EBX_Reg ] & AVX512_mask ) == AVX512_mask );
+#endif
+}
+
+static inline bool has_avx512vaes()
+{
+#ifdef __arm__
+    return false;
+#else
+    int cpu_info[4] = { 0 };
+    cpuid( EXTENDED_FEATURES, cpu_info );
+    return cpu_info[ ECX_Reg ] & AVX512VAES_Flag;
+#endif
+}
 
 // AMD only
-static inline bool has_xop_()
+static inline bool has_xop()
 {
 #ifdef __arm__
         return false;
 #else
         int cpu_info[4] = { 0 };
-        cpuid( CPU_INFO, cpu_info );
+        cpuid( EXTENDED_CPU_INFO, cpu_info );
         return cpu_info[ ECX_Reg ] & XOP_Flag;
 #endif
 }
 
-bool has_xop() { return has_xop_(); }
-
-static inline bool has_fma3_()
+static inline bool has_fma3()
 {
 #ifdef __arm__
         return false;
@@ -392,9 +452,7 @@ static inline bool has_fma3_()
 #endif
 }
 
-bool has_fma3() { return has_fma3_(); }
-
-static inline bool has_sse42_()
+static inline bool has_sse42()
 {
 #ifdef __arm__
         return false;
@@ -405,9 +463,7 @@ static inline bool has_sse42_()
 #endif
 }
 
-bool has_sse42() { return has_sse42_(); }
-
-static inline bool has_sse_()
+static inline bool has_sse()
 {
 #ifdef __arm__
         return false;
@@ -418,16 +474,14 @@ static inline bool has_sse_()
 #endif
 }
 
-bool has_sse() { return has_sse_(); }
-
-uint32_t cpuid_get_highest_function_number()
+static inline uint32_t cpuid_get_highest_function_number()
 {
   uint32_t cpu_info[4] = {0};
   cpuid( VENDOR_ID, cpu_info);
   return cpu_info[ EAX_Reg ];
 }
 
-void cpuid_get_highest_function( char* s )
+static inline void cpuid_get_highest_function( char* s )
 {
   uint32_t fn = cpuid_get_highest_function_number();
   switch (fn)
@@ -449,7 +503,7 @@ void cpuid_get_highest_function( char* s )
   }
 }
 
-void cpu_bestfeature(char *outbuf, size_t maxsz)
+static inline void cpu_bestfeature(char *outbuf, size_t maxsz)
 {
 #ifdef __arm__
 	sprintf(outbuf, "ARM");
@@ -459,19 +513,19 @@ void cpu_bestfeature(char *outbuf, size_t maxsz)
 	cpuid( CPU_INFO, cpu_info );
 	cpuid( EXTENDED_FEATURES, cpu_info_adv );
 
-        if ( has_avx1_() && has_avx2_() )
+        if ( has_avx() && has_avx2() )
               sprintf(outbuf, "AVX2");
-        else if ( has_avx1_() )
-              sprintf(outbuf, "AVX1");
-        else if ( has_fma3_() )
+        else if ( has_avx() )
+              sprintf(outbuf, "AVX");
+        else if ( has_fma3() )
               sprintf(outbuf, "FMA3");
-        else if ( has_xop_() )
+        else if ( has_xop() )
               sprintf(outbuf, "XOP");
-        else if ( has_sse42_() )
+        else if ( has_sse42() )
               sprintf(outbuf, "SSE42");
-        else if ( has_sse2_() )
+        else if ( has_sse2() )
               sprintf(outbuf, "SSE2");
-        else if ( has_sse_() )
+        else if ( has_sse() )
               sprintf(outbuf, "SSE");
         else
               *outbuf = '\0';
@@ -479,7 +533,7 @@ void cpu_bestfeature(char *outbuf, size_t maxsz)
 #endif
 }
 
-void cpu_brand_string( char* s )
+static inline void cpu_brand_string( char* s )
 {
 #ifdef __arm__
         sprintf( s, "ARM" );
@@ -498,3 +552,5 @@ void cpu_brand_string( char* s )
 #endif
 }    
 
+#endif  // SYSINFOS_C__
+
diff --git a/util.c b/util.c
index 7e07ae2..cc181ff 100644
--- a/util.c
+++ b/util.c
@@ -24,6 +24,7 @@
 #include <unistd.h>
 #include <jansson.h>
 #include <curl/curl.h>
+#include "sysinfos.c"
 #include <time.h>
 #include <sys/stat.h>
 #include <math.h>