v3.12.8.1

v3.12.8
v3.12.7
2025-09-17 23:44:27 +00:00 · 2020-04-17 16:12:45 -04:00 · 2020-04-09 12:56:18 -04:00 · 2020-03-20 16:30:12 -04:00
89 changed files with 2281 additions and 851 deletions
--- a/.RELEASE_NOTES.swp
+++ b/.RELEASE_NOTES.swp
--- a/Makefile.am
+++ b/Makefile.am
@@ -163,6 +163,7 @@ cpuminer_SOURCES = \
  algo/sha/sha256-hash-4way.c \
  algo/sha/sha512-hash-4way.c \
  algo/sha/hmac-sha256-hash.c \
+  algo/sha/hmac-sha256-hash-4way.c \
  algo/sha/sha2.c \
  algo/sha/sha256t-gate.c \
  algo/sha/sha256t-4way.c \
--- a/23
+++ b/23
@@ -65,6 +65,27 @@ If not what makes it happen or not happen?
 Change Log
 ----------

+v3.12.8.1
+
+Issue #261: Fixed yescryptr8g invalid shares.
+
+v3.12.8
+
+Yespower sha256 prehash made thread safe.
+
+Rewrote diff conversion functions from scratch to be simpler and use 
+long double (float80) and int128 arithmetic for improved accuracy and
+precision.
+
+Some code cleanup and assorted small changes.
+
+v3.12.7
+
+Issue #257: fixed a file descriptor leak which caused the CPU temperature
+and frequency query to report zeros after mining for a couple of hours.
+
+Issue #253: stale share reduction for yescrypt, sonoa.
+
 v3.12.6.1

 Issue #252: Fixed SSL mining (stratum+tcps://)
@@ -106,7 +127,7 @@ a specific algo name.
 v3.12.4.6

 Issue #246: fixed getwork repeated new block logs with same height. New work
-for the same block is now reported as "New work" instead of New block".
+for the same block is now reported as "New work" instead of "New block".
 Also added a check that work is new before generating "New work" log.

 Added target diff to getwork new block log.
--- a/algo-gate-api.c
+++ b/algo-gate-api.c
@@ -102,23 +102,16 @@ int null_hash()
   applog(LOG_WARNING,"SWERR: null_hash unsafe null function");
   return 0;
 };
-/*
-void null_hash_suw()
-{
-  applog(LOG_WARNING,"SWERR: null_hash_suw unsafe null function");
-};
-*/
+
 void init_algo_gate( algo_gate_t* gate )
 {
   gate->miner_thread_init       = (void*)&return_true;
   gate->scanhash                = (void*)&null_scanhash;
   gate->hash                    = (void*)&null_hash;
-//   gate->hash_suw                = (void*)&null_hash_suw;
   gate->get_new_work            = (void*)&std_get_new_work;
   gate->work_decode             = (void*)&std_le_work_decode;
   gate->decode_extra_data       = (void*)&do_nothing;
   gate->gen_merkle_root         = (void*)&sha256d_gen_merkle_root;
-   gate->stratum_gen_work        = (void*)&std_stratum_gen_work;
   gate->build_stratum_request   = (void*)&std_le_build_stratum_request;
   gate->malloc_txs_request      = (void*)&std_malloc_txs_request;
   gate->submit_getwork_result   = (void*)&std_le_submit_getwork_result;
@@ -232,11 +225,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
    case ALGO_X22I:          register_x22i_algo          ( gate ); break;
    case ALGO_X25X:          register_x25x_algo          ( gate ); break;
    case ALGO_XEVAN:         register_xevan_algo         ( gate ); break;
-/*    case ALGO_YESCRYPT:     register_yescrypt_05_algo     ( gate ); break;
-     case ALGO_YESCRYPTR8:   register_yescryptr8_05_algo   ( gate ); break;
-     case ALGO_YESCRYPTR16:  register_yescryptr16_05_algo  ( gate ); break;
-     case ALGO_YESCRYPTR32:  register_yescryptr32_05_algo  ( gate ); break;
-*/
    case ALGO_YESCRYPT:      register_yescrypt_algo      ( gate ); break;
    case ALGO_YESCRYPTR8:    register_yescryptr8_algo    ( gate ); break;
    case ALGO_YESCRYPTR8G:   register_yescryptr8g_algo   ( gate ); break;
--- a/algo-gate-api.h
+++ b/algo-gate-api.h
@@ -75,7 +75,7 @@

 // my hack at creating a set data type using bit masks. Set inclusion,
 // exclusion union and intersection operations are provided for convenience. In // some cases it may be desireable to use boolean algebra directly on the
-// data to perfomr set operations. Sets can be represented as single
+// data to perform set operations. Sets can be represented as single
 // elements, a bitwise OR of multiple elements, a bitwise OR of multiple
 // set variables or constants, or combinations of the above.
 // Examples:
@@ -110,13 +110,11 @@ inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }

 typedef struct
 {
-// mandatory functions, must be overwritten
+// mandatory function, must be overwritten
 int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );

-// not used anywhere
-// optional unsafe, must be overwritten if algo uses function
+// Deprecated, will be removed
 int ( *hash )     ( void*, const void*, uint32_t ) ;
-//void ( *hash_suw ) ( void*, const void* );

 //optional, safe to use default in most cases

@@ -124,9 +122,6 @@ int ( *hash )     ( void*, const void*, uint32_t ) ;
 // threads.
 bool ( *miner_thread_init )     ( int );

-// Generate global blockheader from stratum data.
-void ( *stratum_gen_work )      ( struct stratum_ctx*, struct work* );
-
 // Get thread local copy of blockheader with unique nonce.
 void ( *get_new_work )          ( struct work*, struct work*, int, uint32_t* );

@@ -166,7 +161,9 @@ bool ( *do_this_thread )        ( int );
 // After do_this_thread
 void ( *resync_threads )        ( struct work* );

+// No longer needed
 json_t* (*longpoll_rpc_call)      ( CURL*, int*, char* );
+
 set_t optimizations;
 int  ( *get_work_data_size )     ();
 int  ntime_index;
@@ -215,15 +212,12 @@ int null_scanhash();

 // displays warning
 int null_hash    ();
-//void null_hash_suw();

 // optional safe targets, default listed first unless noted.

 void std_get_new_work( struct work *work, struct work *g_work, int thr_id,
                       uint32_t* end_nonce_ptr );

-void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *work );
-
 void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx );
 void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );

@@ -251,10 +245,6 @@ void std_build_block_header( struct work* g_work, uint32_t version,
 void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );

 json_t* std_longpoll_rpc_call( CURL *curl, int *err, char *lp_url );
-//json_t* jr2_longpoll_rpc_call( CURL *curl, int *err );
-
-//bool std_stratum_handle_response( json_t *val );
-//bool jr2_stratum_handle_response( json_t *val );

 bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
                        int thr_id );
@@ -273,11 +263,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate );
 // compiler warnings but that's just more work for devs adding new algos.
 bool register_algo( algo_gate_t *gate );

-// Overrides a common set of functions used by RPC2 and other RPC2-specific
-// init. Called by algo's register function before initializing algo-specific
-// functions and data.
-//bool register_json_rpc2( algo_gate_t *gate );
-
 // use this to call the hash function of an algo directly, ie util.c test.
 void exec_hash_function( int algo, void *output, const void *pdata );

--- a/algo/blake/blake-4way.c
+++ b/algo/blake/blake-4way.c
@@ -48,7 +48,7 @@ int scanhash_blake_4way( struct work *work, uint32_t max_nonce,
      if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
      {
          pdata[19] = n+i;
-          submit_lane_solution( work, hash+(i<<3), mythr, i );
+          submit_solution( work, hash+(i<<3), mythr );
      }
      n += 4;

@@ -107,7 +107,7 @@ int scanhash_blake_8way( struct work *work, uint32_t max_nonce,
      if ( (hash+i)[7] <= HTarget && fulltest( hash+i, ptarget ) )
      {
          pdata[19] = n+i;
-          submit_lane_solution( work, hash+(i<<3), mythr, i );
+          submit_solution( work, hash+(i<<3), mythr );
      }
      n += 8;

--- a/algo/blake/blake2b-4way.c
+++ b/algo/blake/blake2b-4way.c
@@ -45,7 +45,7 @@ int scanhash_blake2b_8way( struct work *work, uint32_t max_nonce,
          if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
          {
              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
          }
      }
      n += 8;
@@ -100,7 +100,7 @@ int scanhash_blake2b_4way( struct work *work, uint32_t max_nonce,
          if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
          {
              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
          }
      }
      n += 4;
--- a/algo/blake/blake2s-4way.c
+++ b/algo/blake/blake2s-4way.c
@@ -49,7 +49,7 @@ int scanhash_blake2s_16way( struct work *work, uint32_t max_nonce,
         if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
         {
              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
         }
      }
      n += 16;
@@ -104,7 +104,7 @@ int scanhash_blake2s_8way( struct work *work, uint32_t max_nonce,
         if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
         {
              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
         }
      }
      n += 8;
@@ -157,7 +157,7 @@ int scanhash_blake2s_4way( struct work *work, uint32_t max_nonce,
         if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
         {
              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
              }
      }
      n += 4;
--- a/algo/blake/blakecoin-4way.c
+++ b/algo/blake/blakecoin-4way.c
@@ -49,7 +49,7 @@ int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce,
           && !opt_benchmark )
      {
          pdata[19] = n+i;
-          submit_lane_solution( work, hash+(i<<3), mythr, i );
+          submit_solution( work, hash+(i<<3), mythr );
      }
      n += 4;

@@ -108,7 +108,7 @@ int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
          && !opt_benchmark )
      {
          pdata[19] = n+i;
-          submit_lane_solution( work, hash+(i<<3), mythr, i );
+          submit_solution( work, hash+(i<<3), mythr );
      }
      n += 8;
   } while ( (n < max_nonce) && !work_restart[thr_id].restart );
--- a/algo/blake/decred-4way.c
+++ b/algo/blake/decred-4way.c
@@ -62,7 +62,7 @@ int scanhash_decred_4way( struct work *work, uint32_t max_nonce,
      if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
      {
          pdata[DECRED_NONCE_INDEX] = n+i;
-          submit_lane_solution( work, hash+(i<<3), mythr, i );
+          submit_solution( work, hash+(i<<3), mythr );
      }
      n += 4;
  } while ( (n < max_nonce) && !work_restart[thr_id].restart );
--- a/algo/blake/pentablake-4way.c
+++ b/algo/blake/pentablake-4way.c
@@ -105,7 +105,7 @@ int scanhash_pentablake_4way( struct work *work,
                  && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
              {
                 pdata[19] = n + i;
-                 submit_lane_solution( work, hash+(i<<3), mythr, i );
+                 submit_solution( work, hash+(i<<3), mythr );
              }
              n += 4;

--- a/algo/bmw/bmw512-4way.c
+++ b/algo/bmw/bmw512-4way.c
@@ -46,7 +46,7 @@ int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,
          if ( fulltest( lane_hash, ptarget ) )
          {
              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
          }
      }
      n += 8;
@@ -99,7 +99,7 @@ int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
          if ( fulltest( lane_hash, ptarget ) )
          {
              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
          }
      }
      n += 4;
--- a/algo/groestl/groestl-4way.c
+++ b/algo/groestl/groestl-4way.c
@@ -53,7 +53,7 @@ int scanhash_groestl_4way( struct work *work, uint32_t max_nonce,
        if ( fulltest( hash+(lane<<3), ptarget) && !opt_benchmark )
        {
           pdata[19] = n + lane;
-           submit_lane_solution( work, hash+(lane<<3), mythr, lane );
+           submit_solution( work, hash+(lane<<3), mythr );
        }
        n += 4;
     } while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
--- a/algo/groestl/myrgr-4way.c
+++ b/algo/groestl/myrgr-4way.c
@@ -143,7 +143,7 @@ int scanhash_myriad_8way( struct work *work, uint32_t max_nonce,
         if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
         {
            pdata[19] = n + lane;
-            submit_lane_solution( work, lane_hash, mythr, lane );
+            submit_solution( work, lane_hash, mythr );
         }
      }
      n += 8;
@@ -226,7 +226,7 @@ int scanhash_myriad_4way( struct work *work, uint32_t max_nonce,
         if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
         {
            pdata[19] = n + lane;
-            submit_lane_solution( work, lane_hash, mythr, lane );
+            submit_solution( work, lane_hash, mythr );
         }
      }
      n += 4;
--- a/algo/jh/jha-4way.c
+++ b/algo/jh/jha-4way.c
@@ -129,7 +129,7 @@ int scanhash_jha_4way( struct work *work, uint32_t max_nonce,
                 if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
                 {
                    pdata[19] = n+i;
-                    submit_lane_solution( work, lane_hash, mythr, i );
+                    submit_solution( work, lane_hash, mythr );
                 }
              }
              n += 4;
--- a/algo/keccak/keccak-4way.c
+++ b/algo/keccak/keccak-4way.c
@@ -45,7 +45,7 @@ int scanhash_keccak_8way( struct work *work, uint32_t max_nonce,
          if ( valid_hash( lane_hash, ptarget ) )
          {
              pdata[19] = bswap_32( n + lane );
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
          }
      }
      *noncev = _mm512_add_epi32( *noncev,
@@ -97,7 +97,7 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
          if ( valid_hash( lane_hash, ptarget ))
          {
              pdata[19] = bswap_32( n + lane );
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
          }
      }
      *noncev = _mm256_add_epi32( *noncev,
--- a/algo/keccak/sha3d-4way.c
+++ b/algo/keccak/sha3d-4way.c
@@ -52,7 +52,7 @@ int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
          if ( valid_hash( lane_hash, ptarget ) )
          {
              pdata[19] = bswap_32( n + lane );
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
          }
      }
      *noncev = _mm512_add_epi32( *noncev,
@@ -111,7 +111,7 @@ int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce,
          if ( valid_hash( lane_hash, ptarget ) )
          {
              pdata[19] = bswap_32( n + lane );
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
          }
      }
      *noncev = _mm256_add_epi32( *noncev,
--- a/algo/lyra2/allium-4way.c
+++ b/algo/lyra2/allium-4way.c
@@ -245,7 +245,7 @@ int scanhash_allium_16way( struct work *work, uint32_t max_nonce,
     if ( unlikely( valid_hash( hash+(lane<<3), ptarget ) && !bench ) )
     {
         pdata[19] = bswap_32( n + lane );
-         submit_lane_solution( work, hash+(lane<<3), mythr, lane );
+         submit_solution( work, hash+(lane<<3), mythr );
     }
     *noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
     n += 16;
@@ -394,7 +394,7 @@ int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
        if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
        {
           pdata[19] = bswap_32( n + lane );
-           submit_lane_solution( work, lane_hash, mythr, lane );
+           submit_solution( work, lane_hash, mythr );
        }
     }
     n += 8;
--- a/algo/lyra2/lyra2h-4way.c
+++ b/algo/lyra2/lyra2h-4way.c
@@ -76,7 +76,7 @@ int scanhash_lyra2h_4way( struct work *work, uint32_t max_nonce,
           && !opt_benchmark )
      {
          pdata[19] = n+i;         
-          submit_lane_solution( work, hash+(i<<3), mythr, i );
+          submit_solution( work, hash+(i<<3), mythr );
      }
      n += 4;
   } while (  (n < max_nonce-4) && !work_restart[thr_id].restart);
--- a/algo/lyra2/lyra2rev2-4way.c
+++ b/algo/lyra2/lyra2rev2-4way.c
@@ -200,7 +200,7 @@ int scanhash_lyra2rev2_16way( struct work *work, const uint32_t max_nonce,
         if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
         {
             pdata[19] = bswap_32( n + lane );
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
         }
      }
      *noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
@@ -342,7 +342,7 @@ int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
         if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
         {
             pdata[19] = bswap_32( n + lane );
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
         }
      }
      *noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
@@ -469,7 +469,7 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
         if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark )
         {
            pdata[19] = n + lane;         
-            submit_lane_solution( work, lane_hash, mythr, lane );
+            submit_solution( work, lane_hash, mythr );
         }
      }
      n += 4;
--- a/algo/lyra2/lyra2rev3-4way.c
+++ b/algo/lyra2/lyra2rev3-4way.c
@@ -165,7 +165,7 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
         if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
         {
             pdata[19] = n + lane;
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
         }
      }
      n += 16;
@@ -284,7 +284,7 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
         if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
         {
             pdata[19] = bswap_32( n + lane );
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
         }
      }
      *noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
@@ -386,7 +386,7 @@ int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce,
         if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) 
         {
              pdata[19] = bswap_32( n + lane );    
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
 	      }
      }
      *noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) );
--- a/algo/lyra2/lyra2z-4way.c
+++ b/algo/lyra2/lyra2z-4way.c
@@ -124,7 +124,7 @@ int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce,
        if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
        {
           pdata[19] = bswap_32( n + lane );
-           submit_lane_solution( work, lane_hash, mythr, lane );
+           submit_solution( work, lane_hash, mythr );
        }
      }
      *noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
@@ -222,7 +222,7 @@ int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
        if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
        {
           pdata[19] = bswap_32( n + lane );
-           submit_lane_solution( work, lane_hash, mythr, lane );
+           submit_solution( work, lane_hash, mythr );
        }
      }
      *noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
@@ -301,7 +301,7 @@ int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce,
        if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
        {
           pdata[19] = bswap_32( n + lane );
-           submit_lane_solution( work, lane_hash, mythr, lane );
+           submit_solution( work, lane_hash, mythr );
        }
      }
      *noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) );
--- a/algo/lyra2/lyra2z330.c
+++ b/algo/lyra2/lyra2z330.c
@@ -68,7 +68,7 @@ bool lyra2z330_thread_init()

 bool register_lyra2z330_algo( algo_gate_t* gate )
 {
-  gate->optimizations = SSE42_OPT | AVX2_OPT;
+  gate->optimizations = SSE2_OPT | AVX2_OPT;
  gate->miner_thread_init = (void*)&lyra2z330_thread_init;
  gate->scanhash   = (void*)&scanhash_lyra2z330;
  gate->hash       = (void*)&lyra2z330_hash;
--- a/algo/lyra2/phi2-4way.c
+++ b/algo/lyra2/phi2-4way.c
@@ -302,7 +302,7 @@ int scanhash_phi2_8way( struct work *work, uint32_t max_nonce,
         if ( valid_hash( lane_hash, ptarget ) )
         {
            be32enc( pdata + 19, n + lane );
-            submit_lane_solution( work, lane_hash, mythr, lane );
+            submit_solution( work, lane_hash, mythr );
         }
      }
      n += 8;
@@ -483,7 +483,7 @@ int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
         if ( valid_hash( lane_hash, ptarget ) )
         {
            be32enc( pdata + 19, n + lane );
-            submit_lane_solution( work, lane_hash, mythr, lane );
+            submit_solution( work, lane_hash, mythr );
         }
      }
      edata[        19 ] += 4;
--- a/algo/nist5/nist5-4way.c
+++ b/algo/nist5/nist5-4way.c
@@ -108,7 +108,7 @@ int scanhash_nist5_8way( struct work *work, uint32_t max_nonce,
           if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
           {
              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
           }
        }
        n += 8;
@@ -196,7 +196,7 @@ int scanhash_nist5_4way( struct work *work, uint32_t max_nonce,
           if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
           {
              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
           }
        }
        n += 4;
--- a/algo/quark/anime-4way.c
+++ b/algo/quark/anime-4way.c
@@ -223,7 +223,7 @@ int scanhash_anime_8way( struct work *work, uint32_t max_nonce,
          if ( valid_hash( lane_hash, ptarget ) )
          {
             pdata[19] = bswap_32( n + lane );
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
          }
       }
       *noncev = _mm512_add_epi32( *noncev,
@@ -383,7 +383,7 @@ int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
          if ( valid_hash( lane_hash, ptarget ) )
          {
             pdata[19] = bswap_32( n + lane );
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
          }
       }
       *noncev = _mm256_add_epi32( *noncev,
--- a/algo/quark/hmq1725-4way.c
+++ b/algo/quark/hmq1725-4way.c
@@ -596,7 +596,7 @@ int scanhash_hmq1725_8way( struct work *work, uint32_t max_nonce,
          if ( valid_hash( lane_hash, ptarget ) )
          {
             pdata[19] = bswap_32( n + lane );
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
          }
       }
       *noncev = _mm512_add_epi32( *noncev,
@@ -1018,7 +1018,7 @@ int scanhash_hmq1725_4way( struct work *work, uint32_t max_nonce,
          if ( valid_hash( lane_hash, ptarget ) )
          {
             pdata[19] = bswap_32( n + lane );
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
          }
       }
       *noncev = _mm256_add_epi32( *noncev,
--- a/algo/quark/quark-4way.c
+++ b/algo/quark/quark-4way.c
@@ -235,7 +235,7 @@ int scanhash_quark_8way( struct work *work, uint32_t max_nonce,
          if ( valid_hash( lane_hash, ptarget ) )
          {
             pdata[19] = bswap_32( n + lane );
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
          }
       }
       *noncev = _mm512_add_epi32( *noncev,
@@ -408,7 +408,7 @@ int scanhash_quark_4way( struct work *work, uint32_t max_nonce,
          if ( valid_hash( lane_hash, ptarget ) )
          {
             pdata[19] = bswap_32( n + lane );
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
          }
       }
       *noncev = _mm256_add_epi32( *noncev,
--- a/algo/qubit/deep-2way.c
+++ b/algo/qubit/deep-2way.c
@@ -106,13 +106,13 @@ int scanhash_deep_2way( struct work *work,uint32_t max_nonce,
            if ( fulltest( hash, ptarget) && !opt_benchmark )
            {
                pdata[19] = n;
-                submit_lane_solution( work, hash, mythr, 0 );
+                submit_solution( work, hash, mythr );
            }
            if ( !( (hash+8)[7] & mask ) )
            if ( fulltest( hash+8, ptarget) && !opt_benchmark )
            {
               pdata[19] = n+1;
-               submit_lane_solution( work, hash+8, mythr, 1 );
+               submit_solution( work, hash+8, mythr );
            }
            n += 2;
         } while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
--- a/algo/qubit/qubit-2way.c
+++ b/algo/qubit/qubit-2way.c
@@ -153,7 +153,7 @@ int scanhash_qubit_4way( struct work *work,uint32_t max_nonce,
        if ( likely( fulltest( hash+(lane<<3), ptarget) && !opt_benchmark ) )
        {
           pdata[19] = n + lane;
-           submit_lane_solution( work, hash+(lane<<3), mythr, lane );
+           submit_solution( work, hash+(lane<<3), mythr );
        }
        n += 4;
     } while ( ( n < max_nonce-4 ) && !work_restart[thr_id].restart );
@@ -255,13 +255,13 @@ int scanhash_qubit_2way( struct work *work,uint32_t max_nonce,
         if ( likely( fulltest( hash, ptarget) && !opt_benchmark ) )
         {
            pdata[19] = n;
-            submit_lane_solution( work, hash, mythr, 0 );
+            submit_solution( work, hash, mythr );
         }
         if ( unlikely( ( (hash+8))[7] <= Htarg ) )
         if ( likely( fulltest( hash+8, ptarget) && !opt_benchmark ) )
         {
            pdata[19] = n+1;
-            submit_lane_solution( work, hash+8, mythr, 1 );
+            submit_solution( work, hash+8, mythr );
         }
         n += 2;
     } while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
--- a/algo/ripemd/lbry-4way.c
+++ b/algo/ripemd/lbry-4way.c
@@ -132,7 +132,7 @@ int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
         if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
         {
            pdata[27] = n + i;
-            submit_lane_solution( work, lane_hash, mythr, i );
+            submit_solution( work, lane_hash, mythr );
         }
      }
      n += 16;
@@ -251,7 +251,7 @@ int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
         if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
         {
            pdata[27] = n + i;
-            submit_lane_solution( work, lane_hash, mythr, i );
+            submit_solution( work, lane_hash, mythr );
         }
      }
      n += 8;
--- a/algo/sha/hmac-sha256-hash-4way.c
+++ b/algo/sha/hmac-sha256-hash-4way.c
@@ -0,0 +1,440 @@
+/*-
+ * Copyright 2005,2007,2009 Colin Percival
+ * Copywright 2020 JayDDee246@gmail.com
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <string.h>
+#include "hmac-sha256-hash-4way.h"
+#include "compat.h"
+
+// HMAC 4-way SSE2
+
+/**
+ * HMAC_SHA256_Buf(K, Klen, in, len, digest):
+ * Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
+ * length ${Klen}, and write the result to ${digest}.
+ */
+void
+hmac_sha256_4way_full( void *digest, const void *K, size_t Klen,
+                       const void *in, size_t len )
+{
+   hmac_sha256_4way_context ctx;
+   hmac_sha256_4way_init( &ctx, K, Klen );
+   hmac_sha256_4way_update( &ctx, in, len );
+   hmac_sha256_4way_close( &ctx, digest );
+}
+
+/* Initialize an HMAC-SHA256 operation with the given key. */
+void
+hmac_sha256_4way_init( hmac_sha256_4way_context *ctx, const void *_K,
+                       size_t Klen )
+{
+	unsigned char pad[64*4] __attribute__ ((aligned (64)));
+	unsigned char khash[32*4] __attribute__ ((aligned (64)));
+	const unsigned char * K = _K;
+	size_t i;
+
+	/* If Klen > 64, the key is really SHA256(K). */
+	if ( Klen > 64 )
+   {
+		sha256_4way_init( &ctx->ictx );
+		sha256_4way_update( &ctx->ictx, K, Klen );
+		sha256_4way_close( &ctx->ictx, khash );
+		K = khash;
+		Klen = 32;
+	}
+
+	/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
+   sha256_4way_init( &ctx->ictx );
+	memset( pad, 0x36, 64*4 );
+
+   for ( i = 0; i < Klen; i++ )
+		casti_m128i( pad, i ) = _mm_xor_si128( casti_m128i( pad, i ),
+                                             casti_m128i( K, i ) );
+
+   sha256_4way_update( &ctx->ictx, pad, 64 );
+
+	/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
+	sha256_4way_init( &ctx->octx );
+	memset( pad, 0x5c, 64*4 );
+	for ( i = 0; i < Klen/4; i++ )
+		casti_m128i( pad, i ) = _mm_xor_si128( casti_m128i( pad, i ),
+                                             casti_m128i( K, i ) );
+	sha256_4way_update( &ctx->octx, pad, 64 );
+}
+
+/* Add bytes to the HMAC-SHA256 operation. */
+void
+hmac_sha256_4way_update( hmac_sha256_4way_context *ctx, const void *in,
+                         size_t len )
+{
+	/* Feed data to the inner SHA256 operation. */
+	sha256_4way_update( &ctx->ictx, in, len );
+}
+
+/* Finish an HMAC-SHA256 operation. */
+void
+hmac_sha256_4way_close( hmac_sha256_4way_context *ctx, void *digest )
+{
+	unsigned char ihash[32*4] __attribute__ ((aligned (64)));
+
+	/* Finish the inner SHA256 operation. */
+	sha256_4way_close( &ctx->ictx, ihash );
+
+	/* Feed the inner hash to the outer SHA256 operation. */
+	sha256_4way_update( &ctx->octx, ihash, 32 );
+
+	/* Finish the outer SHA256 operation. */
+	sha256_4way_close( &ctx->octx, digest );
+}
+
+/**
+ * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
+ * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
+ * write the output to buf.  The value dkLen must be at most 32 * (2^32 - 1).
+ */
+void
+pbkdf2_sha256_4way( uint8_t *buf, size_t dkLen,
+                    const uint8_t *passwd, size_t passwdlen,
+                    const uint8_t *salt, size_t saltlen, uint64_t c )
+{
+	hmac_sha256_4way_context PShctx, hctx;
+	uint8_t _ALIGN(128) T[32*4];
+	uint8_t _ALIGN(128) U[32*4];
+   __m128i ivec;
+   size_t i, clen;
+	uint64_t j;
+	int k;
+
+	/* Compute HMAC state after processing P and S. */
+	hmac_sha256_4way_init( &PShctx, passwd, passwdlen );
+	hmac_sha256_4way_update( &PShctx, salt, saltlen );
+
+	/* Iterate through the blocks. */
+	for ( i = 0; i * 32 < dkLen; i++ )
+   {
+		/* Generate INT(i + 1). */
+      ivec = _mm_set1_epi32( bswap_32( i+1 ) ); 
+
+		/* Compute U_1 = PRF(P, S || INT(i)). */
+		memcpy( &hctx, &PShctx, sizeof(hmac_sha256_4way_context) );
+		hmac_sha256_4way_update( &hctx, &ivec, 4 );
+		hmac_sha256_4way_close( &hctx, U );
+
+		/* T_i = U_1 ... */
+		memcpy( T, U, 32*4 );
+
+		for ( j = 2; j <= c; j++ )
+      {
+			/* Compute U_j. */
+			hmac_sha256_4way_init( &hctx, passwd, passwdlen );
+			hmac_sha256_4way_update( &hctx, U, 32 );
+			hmac_sha256_4way_close( &hctx, U );
+
+			/* ... xor U_j ... */
+			for ( k = 0; k < 8; k++ )
+				casti_m128i( T, k ) = _mm_xor_si128( casti_m128i( T, k ),
+                                                 casti_m128i( U, k ) );
+		}
+
+		/* Copy as many bytes as necessary into buf. */
+		clen = dkLen - i * 32;
+		if ( clen > 32 )
+			clen = 32;
+		memcpy( &buf[ i*32*4 ], T, clen*4 );
+	}
+}
+
+#if defined(__AVX2__)
+
+// HMAC 8-way AVX2
+
+void
+hmac_sha256_8way_full( void *digest, const void *K, size_t Klen,
+                       const void *in, size_t len )
+{
+   hmac_sha256_8way_context ctx;
+   hmac_sha256_8way_init( &ctx, K, Klen );
+   hmac_sha256_8way_update( &ctx, in, len );
+   hmac_sha256_8way_close( &ctx, digest );
+}
+
+/* Initialize an HMAC-SHA256 operation with the given key. */
+void
+hmac_sha256_8way_init( hmac_sha256_8way_context *ctx, const void *_K,
+                       size_t Klen )
+{
+   unsigned char pad[64*8] __attribute__ ((aligned (128)));
+   unsigned char khash[32*8] __attribute__ ((aligned (128)));
+   const unsigned char * K = _K;
+   size_t i;
+
+   /* If Klen > 64, the key is really SHA256(K). */
+   if ( Klen > 64 )
+   {
+      sha256_8way_init( &ctx->ictx );
+      sha256_8way_update( &ctx->ictx, K, Klen );
+      sha256_8way_close( &ctx->ictx, khash );
+      K = khash;
+      Klen = 32;
+   }
+
+   /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
+   sha256_8way_init( &ctx->ictx );
+   memset( pad, 0x36, 64*8);
+
+   for ( i = 0; i < Klen/4; i++ )
+      casti_m256i( pad, i ) = _mm256_xor_si256( casti_m256i( pad, i ),
+                                                casti_m256i( K, i ) );
+
+   sha256_8way_update( &ctx->ictx, pad, 64 );
+
+   /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
+   sha256_8way_init( &ctx->octx );
+   memset( pad, 0x5c, 64*8 );
+   for ( i = 0; i < Klen/4; i++ )
+      casti_m256i( pad, i ) = _mm256_xor_si256( casti_m256i( pad, i ),
+                                                casti_m256i( K, i ) );
+   sha256_8way_update( &ctx->octx, pad, 64 );
+}
+
+void
+hmac_sha256_8way_update( hmac_sha256_8way_context *ctx, const void *in,
+                         size_t len )
+{
+   /* Feed data to the inner SHA256 operation. */
+   sha256_8way_update( &ctx->ictx, in, len );
+}
+
+/* Finish an HMAC-SHA256 operation. */
+void
+hmac_sha256_8way_close( hmac_sha256_8way_context *ctx, void *digest )
+{
+   unsigned char ihash[32*8] __attribute__ ((aligned (128)));
+
+   /* Finish the inner SHA256 operation. */
+   sha256_8way_close( &ctx->ictx, ihash );
+
+   /* Feed the inner hash to the outer SHA256 operation. */
+   sha256_8way_update( &ctx->octx, ihash, 32 );
+
+   /* Finish the outer SHA256 operation. */
+   sha256_8way_close( &ctx->octx, digest );
+}
+
+/**
+ * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
+ * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
+ * write the output to buf.  The value dkLen must be at most 32 * (2^32 - 1).
+ */
+void
+pbkdf2_sha256_8way( uint8_t *buf, size_t dkLen, const uint8_t *passwd,
+                    size_t passwdlen, const uint8_t *salt, size_t saltlen,
+                    uint64_t c )
+{
+   hmac_sha256_8way_context PShctx, hctx;
+   uint8_t _ALIGN(128) T[32*8];
+   uint8_t _ALIGN(128) U[32*8];
+   size_t i, clen;
+   uint64_t j;
+   int k;
+
+   /* Compute HMAC state after processing P and S. */
+   hmac_sha256_8way_init( &PShctx, passwd, passwdlen );
+
+// saltlen can be odd number of bytes
+   hmac_sha256_8way_update( &PShctx, salt, saltlen );
+
+   /* Iterate through the blocks. */
+   for ( i = 0; i * 32 < dkLen; i++ )
+   {
+      __m256i ivec = _mm256_set1_epi32( bswap_32( i+1 ) );
+
+      /* Compute U_1 = PRF(P, S || INT(i)). */
+      memcpy( &hctx, &PShctx, sizeof(hmac_sha256_8way_context) );
+      hmac_sha256_8way_update( &hctx, &ivec, 4 );
+      hmac_sha256_8way_close( &hctx, U );
+
+      /* T_i = U_1 ... */
+      memcpy( T, U, 32*8 );
+
+      for ( j = 2; j <= c; j++ )
+      {
+         /* Compute U_j. */
+         hmac_sha256_8way_init( &hctx, passwd, passwdlen );
+         hmac_sha256_8way_update( &hctx, U, 32 );
+         hmac_sha256_8way_close( &hctx, U );
+
+         /* ... xor U_j ... */
+         for ( k = 0; k < 8; k++ )
+            casti_m256i( T, k ) = _mm256_xor_si256( casti_m256i( T, k ),
+                                                    casti_m256i( U, k ) );
+      }
+
+      /* Copy as many bytes as necessary into buf. */
+      clen = dkLen - i * 32;
+      if ( clen > 32 )
+         clen = 32;
+      memcpy( &buf[ i*32*8 ], T, clen*8 );
+   }
+}
+
+#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
+
+// HMAC 16-way AVX512
+
+void
+hmac_sha256_16way_full( void *digest, const void *K, size_t Klen,
+                        const void *in, size_t len )
+{
+   hmac_sha256_16way_context ctx;
+   hmac_sha256_16way_init( &ctx, K, Klen );
+   hmac_sha256_16way_update( &ctx, in, len );
+   hmac_sha256_16way_close( &ctx, digest );
+}
+
+void
+hmac_sha256_16way_init( hmac_sha256_16way_context *ctx, const void *_K,
+                       size_t Klen )
+{
+   unsigned char pad[64*16] __attribute__ ((aligned (128)));
+   unsigned char khash[32*16] __attribute__ ((aligned (128)));
+   const unsigned char * K = _K;
+   size_t i;
+
+   /* If Klen > 64, the key is really SHA256(K). */
+   if ( Klen > 64 )
+   {
+      sha256_16way_init( &ctx->ictx );
+      sha256_16way_update( &ctx->ictx, K, Klen );
+      sha256_16way_close( &ctx->ictx, khash );
+      K = khash;
+      Klen = 32;
+   }
+
+   /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
+   sha256_16way_init( &ctx->ictx );
+   memset( pad, 0x36, 64*16 );
+
+   for ( i = 0; i < Klen; i++ )
+      casti_m512i( pad, i ) = _mm512_xor_si512( casti_m512i( pad, i ),
+                                                casti_m512i( K, i ) );
+   sha256_16way_update( &ctx->ictx, pad, 64 );
+
+   /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
+   sha256_16way_init( &ctx->octx );
+   memset( pad, 0x5c, 64*16 );
+   for ( i = 0; i < Klen/4; i++ )
+      casti_m512i( pad, i ) = _mm512_xor_si512( casti_m512i( pad, i ),
+                                             casti_m512i( K, i ) );
+   sha256_16way_update( &ctx->octx, pad, 64 );
+}
+   
+void
+hmac_sha256_16way_update( hmac_sha256_16way_context *ctx, const void *in,
+                         size_t len )
+{
+   /* Feed data to the inner SHA256 operation. */
+   sha256_16way_update( &ctx->ictx, in, len );
+}
+
+/* Finish an HMAC-SHA256 operation. */
+void
+hmac_sha256_16way_close( hmac_sha256_16way_context *ctx, void *digest )
+{
+   unsigned char ihash[32*16] __attribute__ ((aligned (128)));
+
+   /* Finish the inner SHA256 operation. */
+   sha256_16way_close( &ctx->ictx, ihash );
+
+   /* Feed the inner hash to the outer SHA256 operation. */
+   sha256_16way_update( &ctx->octx, ihash, 32 );
+
+   /* Finish the outer SHA256 operation. */
+   sha256_16way_close( &ctx->octx, digest );
+}
+
+/**
+ * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
+ * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
+ * write the output to buf.  The value dkLen must be at most 32 * (2^32 - 1).
+ */
+void
+pbkdf2_sha256_16way( uint8_t *buf, size_t dkLen,
+                     const uint8_t *passwd, size_t passwdlen,
+                     const uint8_t *salt, size_t saltlen, uint64_t c )
+{
+   hmac_sha256_16way_context PShctx, hctx;
+   uint8_t _ALIGN(128) T[32*16];
+   uint8_t _ALIGN(128) U[32*16];
+   __m512i ivec;
+   size_t i, clen;
+   uint64_t j;
+   int k;
+
+   /* Compute HMAC state after processing P and S. */
+   hmac_sha256_16way_init( &PShctx, passwd, passwdlen );
+   hmac_sha256_16way_update( &PShctx, salt, saltlen );
+
+   /* Iterate through the blocks. */
+   for ( i = 0; i * 32 < dkLen; i++ )
+   {
+      /* Generate INT(i + 1). */
+      ivec = _mm512_set1_epi32( bswap_32( i+1 ) );
+
+      /* Compute U_1 = PRF(P, S || INT(i)). */
+      memcpy( &hctx, &PShctx, sizeof(hmac_sha256_16way_context) );
+      hmac_sha256_16way_update( &hctx, &ivec, 4 );
+      hmac_sha256_16way_close( &hctx, U );
+
+      /* T_i = U_1 ... */
+      memcpy( T, U, 32*16 );
+
+      for ( j = 2; j <= c; j++ )
+      {
+         /* Compute U_j. */
+         hmac_sha256_16way_init( &hctx, passwd, passwdlen );
+         hmac_sha256_16way_update( &hctx, U, 32 );
+         hmac_sha256_16way_close( &hctx, U );
+
+         /* ... xor U_j ... */
+         for ( k = 0; k < 8; k++ )
+            casti_m512i( T, k ) = _mm512_xor_si512( casti_m512i( T, k ),
+                                                    casti_m512i( U, k ) );
+      }
+
+      /* Copy as many bytes as necessary into buf. */
+      clen = dkLen - i * 32;
+      if ( clen > 32 )
+         clen = 32;
+      memcpy( &buf[ i*32*16 ], T, clen*16 );
+   }
+}
+
+#endif  // AVX512
+#endif  // AVX2
+
--- a/algo/sha/hmac-sha256-hash-4way.h
+++ b/algo/sha/hmac-sha256-hash-4way.h
@@ -0,0 +1,107 @@
+/*-
+ * Copyright 2005,2007,2009 Colin Percival
+ * Copyright 2020 JayDDee@gmailcom
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/lib/libmd/sha256_Y.h,v 1.2 2006/01/17 15:35:56 phk Exp $
+ */
+
+#ifndef HMAC_SHA256_4WAY_H__
+#define HMAC_SHA256_4WAY_H__
+
+
+// Tested only 8-way with null pers
+
+#include <sys/types.h>
+#include <stdint.h>
+#include "simd-utils.h"
+#include "sha-hash-4way.h"
+
+typedef struct _hmac_sha256_4way_context
+{
+   sha256_4way_context ictx;
+   sha256_4way_context octx;
+} hmac_sha256_4way_context;
+
+//void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
+void hmac_sha256_4way_init( hmac_sha256_4way_context *, const void *, size_t );
+void hmac_sha256_4way_update( hmac_sha256_4way_context *, const void *,
+                              size_t );
+void hmac_sha256_4way_close( hmac_sha256_4way_context *, void* );
+void hmac_sha256_4way_full( void*, const void *, size_t Klen, const void *,
+                            size_t len );
+
+/**
+ * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
+ * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
+ * write the output to buf.  The value dkLen must be at most 32 * (2^32 - 1).
+ */
+void pbkdf2_sha256_4way( uint8_t *, size_t, const uint8_t *, size_t,
+                         const uint8_t *, size_t, uint64_t );
+
+#if defined(__AVX2__)
+
+typedef struct _hmac_sha256_8way_context
+{
+   sha256_8way_context ictx;
+   sha256_8way_context octx;
+} hmac_sha256_8way_context;
+
+//void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
+void hmac_sha256_8way_init( hmac_sha256_8way_context *, const void *, size_t );
+void hmac_sha256_8way_update( hmac_sha256_8way_context *, const void *,
+                              size_t );
+void hmac_sha256_8way_close( hmac_sha256_8way_context *, void* );
+void hmac_sha256_8way_full( void*, const void *, size_t Klen, const void *,
+                            size_t len );
+
+void pbkdf2_sha256_8way( uint8_t *, size_t, const uint8_t *, size_t,
+                        const uint8_t *, size_t, uint64_t );
+      
+#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
+
+typedef struct _hmac_sha256_16way_context
+{
+   sha256_16way_context ictx;
+   sha256_16way_context octx;
+} hmac_sha256_16way_context;
+
+//void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
+void hmac_sha256_16way_init( hmac_sha256_16way_context *,
+                             const void *, size_t );
+void hmac_sha256_16way_update( hmac_sha256_16way_context *, const void *,
+                              size_t );
+void hmac_sha256_16way_close( hmac_sha256_16way_context *, void* );
+void hmac_sha256_16way_full( void*, const void *, size_t Klen, const void *,
+                             size_t len );
+
+void pbkdf2_sha256_16way( uint8_t *, size_t, const uint8_t *, size_t,
+                          const uint8_t *, size_t, uint64_t );
+
+
+
+#endif   // AVX512
+#endif   // AVX2
+
+#endif // HMAC_SHA256_4WAY_H__
--- a/algo/sha/hmac-sha256-hash.c
+++ b/algo/sha/hmac-sha256-hash.c
@@ -81,16 +81,17 @@ HMAC_SHA256_Init( HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen )

 	/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
   SHA256_Init( &ctx->ictx );
-	memset( pad, 0x36, 64 );
-	for ( i = 0; i < Klen; i++ )
-		pad[i] ^= K[i];
+
+
+   for ( i = 0; i < Klen; i++ )  pad[i] = K[i] ^ 0x36;
+   memset( pad + Klen, 0x36, 64 - Klen );
 	SHA256_Update( &ctx->ictx, pad, 64 );

 	/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
 	SHA256_Init( &ctx->octx );
-	memset(pad, 0x5c, 64);
-	for ( i = 0; i < Klen; i++ )
-		pad[i] ^= K[i];
+
+   for ( i = 0; i < Klen; i++ )  pad[i] = K[i] ^ 0x5c;
+   memset( pad + Klen, 0x5c, 64 - Klen );
 	SHA256_Update( &ctx->octx, pad, 64 );
 }

@@ -161,7 +162,13 @@ PBKDF2_SHA256( const uint8_t *passwd, size_t passwdlen, const uint8_t *salt,
 			HMAC_SHA256_Final( U, &hctx );

 			/* ... xor U_j ... */
-			for ( k = 0; k < 32; k++ )
+//         _mm256_xor_si256( *(__m256i*)T, *(__m256i*)U );
+//         _mm_xor_si128( ((__m128i*)T)[0], ((__m128i*)U)[0] );
+//         _mm_xor_si128( ((__m128i*)T)[1], ((__m128i*)U)[1] );
+
+//         for ( k = 0; k < 4; k++ )  T[k] ^= U[k];
+         
+         for ( k = 0; k < 32; k++ )
 				T[k] ^= U[k];
 		}

--- a/algo/sha/sha-hash-4way.h
+++ b/algo/sha/sha-hash-4way.h
@@ -58,6 +58,7 @@ void sha256_4way_init( sha256_4way_context *sc );
 void sha256_4way_update( sha256_4way_context *sc, const void *data,
                         size_t len );
 void sha256_4way_close( sha256_4way_context *sc, void *dst );
+void sha256_4way_full( void *dst, const void *data, size_t len );

 #endif  // SSE2

@@ -75,6 +76,7 @@ typedef struct {
 void sha256_8way_init( sha256_8way_context *sc );
 void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len );
 void sha256_8way_close( sha256_8way_context *sc, void *dst );
+void sha256_8way_full( void *dst, const void *data, size_t len );

 #endif  // AVX2

@@ -92,6 +94,7 @@ typedef struct {
 void sha256_16way_init( sha256_16way_context *sc );
 void sha256_16way_update( sha256_16way_context *sc, const void *data, size_t len );
 void sha256_16way_close( sha256_16way_context *sc, void *dst );
+void sha256_16way_full( void *dst, const void *data, size_t len );

 #endif // AVX512

@@ -110,6 +113,7 @@ void sha512_4way_init( sha512_4way_context *sc);
 void sha512_4way_update( sha512_4way_context *sc, const void *data,
                         size_t len );
 void sha512_4way_close( sha512_4way_context *sc, void *dst );
+void sha512_4way_full( void *dst, const void *data, size_t len );

 #endif  // AVX2

@@ -128,6 +132,7 @@ void sha512_8way_init( sha512_8way_context *sc);
 void sha512_8way_update( sha512_8way_context *sc, const void *data, 
                         size_t len );
 void sha512_8way_close( sha512_8way_context *sc, void *dst );
+void sha512_8way_full( void *dst, const void *data, size_t len );

 #endif  // AVX512

--- a/algo/sha/sha256-hash-4way.c
+++ b/algo/sha/sha256-hash-4way.c
@@ -330,6 +330,14 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
    mm128_block_bswap_32( dst, sc->val );
 }

+void sha256_4way_full( void *dst, const void *data, size_t len )
+{
+   sha256_4way_context ctx;
+   sha256_4way_init( &ctx );
+   sha256_4way_update( &ctx, data, len );
+   sha256_4way_close( &ctx, dst );
+}
+
 #if defined(__AVX2__)

 // SHA-256 8 way
@@ -498,6 +506,10 @@ void sha256_8way_init( sha256_8way_context *sc )
 */
 }

+
+// need to handle odd byte length for yespower.
+// Assume only last update is odd.
+
 void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len )
 {
   __m256i *vdata = (__m256i*)data;
@@ -564,6 +576,13 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst )
    mm256_block_bswap_32( dst, sc->val );
 }

+void sha256_8way_full( void *dst, const void *data, size_t len )
+{
+   sha256_8way_context ctx;
+   sha256_8way_init( &ctx );
+   sha256_8way_update( &ctx, data, len );
+   sha256_8way_close( &ctx, dst );
+}

 #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)

@@ -791,6 +810,14 @@ void sha256_16way_close( sha256_16way_context *sc, void *dst )
    mm512_block_bswap_32( dst, sc->val );
 }

+void sha256_16way_full( void *dst, const void *data, size_t len )
+{
+   sha256_16way_context ctx;
+   sha256_16way_init( &ctx );
+   sha256_16way_update( &ctx, data, len );
+   sha256_16way_close( &ctx, dst );
+}
+
 #endif  // AVX512
 #endif  // __AVX2__
 #endif  // __SSE2__
--- a/algo/sha/sha256q-4way.c
+++ b/algo/sha/sha256q-4way.c
@@ -85,7 +85,7 @@ int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
 	         if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
            {
 	           pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
            }
 	      }
         n += 8;
@@ -173,7 +173,7 @@ int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
            if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
            {
              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
            }
         }
         n += 4;
--- a/algo/sha/sha256t-4way.c
+++ b/algo/sha/sha256t-4way.c
@@ -78,7 +78,7 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
            if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
            {
              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
 	         }
         }
         n += 8;
@@ -161,7 +161,7 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
            if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
            {
              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
 	         }
         } 
         n += 4;
--- a/algo/skein/skein-4way.c
+++ b/algo/skein/skein-4way.c
@@ -65,7 +65,7 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
          if ( valid_hash( lane_hash, ptarget ) )
          {
             pdata[19] = bswap_32( n + lane );
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
          }
       }
       *noncev = _mm512_add_epi32( *noncev,
@@ -162,7 +162,7 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
          if ( valid_hash( lane_hash, ptarget ) )
          {
             pdata[19] = bswap_32( n + lane );
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
          }
       }
       *noncev = _mm256_add_epi32( *noncev,
--- a/algo/skein/skein2-4way.c
+++ b/algo/skein/skein2-4way.c
@@ -53,7 +53,7 @@ int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
          if ( valid_hash( lane_hash, ptarget ) && !bench )
          {
             pdata[19] = bswap_32( n + lane );
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
          }
       }
       *noncev = _mm512_add_epi32( *noncev,
@@ -115,7 +115,7 @@ int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
          if ( valid_hash( lane_hash, ptarget ) && !bench )
          {
             pdata[19] = bswap_32( n + lane );
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
          }
       }
       *noncev = _mm256_add_epi32( *noncev,
--- a/algo/x11/c11-4way.c
+++ b/algo/x11/c11-4way.c
@@ -279,7 +279,7 @@ int scanhash_c11_8way( struct work *work, uint32_t max_nonce,
             && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
        {
           pdata[19] = n+i;
-           submit_lane_solution( work, hash+(i<<3), mythr, i );
+           submit_solution( work, hash+(i<<3), mythr );
        }
        n += 8;
     } while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
@@ -459,7 +459,7 @@ int scanhash_c11_4way( struct work *work, uint32_t max_nonce,
            && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
        {
           pdata[19] = n+i;
-           submit_lane_solution( work, hash+(i<<3), mythr, i );
+           submit_solution( work, hash+(i<<3), mythr );
        }
        n += 4;
     } while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
--- a/algo/x11/timetravel-4way.c
+++ b/algo/x11/timetravel-4way.c
@@ -221,7 +221,7 @@ int scanhash_timetravel_4way( struct work *work, uint32_t max_nonce,
          && !opt_benchmark )
      {
          pdata[19] = n+i;
-          submit_lane_solution( work, hash+(i<<3), mythr, i );
+          submit_solution( work, hash+(i<<3), mythr );
      }
      n += 4;
   } while ( ( n < max_nonce ) && !(*restart) );
--- a/algo/x11/timetravel10-4way.c
+++ b/algo/x11/timetravel10-4way.c
@@ -256,7 +256,7 @@ int scanhash_timetravel10_4way( struct work *work,
            && !opt_benchmark )
      {
         pdata[19] = n+i;
-         submit_lane_solution( work, hash+(i<<3), mythr, i );
+         submit_solution( work, hash+(i<<3), mythr );
      }
      n += 4;
   } while ( ( n < max_nonce ) && !(*restart) );
--- a/algo/x11/tribus-4way.c
+++ b/algo/x11/tribus-4way.c
@@ -128,7 +128,7 @@ int scanhash_tribus_8way( struct work *work, uint32_t max_nonce,
     if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
     {
          pdata[19] = n+i;
-          submit_lane_solution( work, hash+(i<<3), mythr, i );
+          submit_solution( work, hash+(i<<3), mythr );
     }
     n += 8;
   } while ( ( n < max_nonce-8 )  && !work_restart[thr_id].restart);
@@ -213,7 +213,7 @@ int scanhash_tribus_4way( struct work *work, uint32_t max_nonce,
     if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
     {
          pdata[19] = n+i;
-          submit_lane_solution( work, hash+(i<<3), mythr, i );
+          submit_solution( work, hash+(i<<3), mythr );
     }
     n += 4;
   } while ( ( n < max_nonce-4 )  && !work_restart[thr_id].restart);
--- a/algo/x11/x11-4way.c
+++ b/algo/x11/x11-4way.c
@@ -279,7 +279,7 @@ int scanhash_x11_8way( struct work *work, uint32_t max_nonce,
              && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
         {
             pdata[19] = n+i;
-             submit_lane_solution( work, hash+(i<<3), mythr, i );
+             submit_solution( work, hash+(i<<3), mythr );
         }
         n += 8;
     } while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
@@ -469,7 +469,7 @@ int scanhash_x11_4way( struct work *work, uint32_t max_nonce,
                 && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
            {
               pdata[19] = n+i;
-               submit_lane_solution( work, hash+(i<<3), mythr, i );
+               submit_solution( work, hash+(i<<3), mythr );
            }
            n += 4;
         } while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
--- a/algo/x11/x11evo-4way.c
+++ b/algo/x11/x11evo-4way.c
@@ -269,7 +269,7 @@ int scanhash_x11evo_4way( struct work* work, uint32_t max_nonce,
                 && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
         {
            pdata[19] = n+i;
-            submit_lane_solution( work, hash+(i<<3), mythr, i );
+            submit_solution( work, hash+(i<<3), mythr );
         }
         n += 4;
     } while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
--- a/algo/x11/x11gost-4way.c
+++ b/algo/x11/x11gost-4way.c
@@ -312,7 +312,7 @@ int scanhash_x11gost_8way( struct work *work, uint32_t max_nonce,
              && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
         {
             pdata[19] = n+i;
-             submit_lane_solution( work, hash+(i<<3), mythr, i );
+             submit_solution( work, hash+(i<<3), mythr );
         }
         n += 8;
     } while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
@@ -498,7 +498,7 @@ int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce,
             && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
        {
           pdata[19] = n+i;
-           submit_lane_solution( work, hash+(i<<3), mythr, i );
+           submit_solution( work, hash+(i<<3), mythr );
        }
        n += 4;
     } while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
--- a/algo/x12/x12-4way.c
+++ b/algo/x12/x12-4way.c
@@ -263,7 +263,7 @@ int scanhash_x12_8way( struct work *work, uint32_t max_nonce,
           if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
           {
              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
           }
        }
        n += 8;
@@ -431,7 +431,7 @@ int scanhash_x12_4way( struct work *work, uint32_t max_nonce,
            if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
            {
               pdata[19] = n+i;
-               submit_lane_solution( work, hash+(i<<3), mythr, i );
+               submit_solution( work, hash+(i<<3), mythr );
            }
            n += 4;
         } while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
--- a/algo/x13/phi1612-4way.c
+++ b/algo/x13/phi1612-4way.c
@@ -208,7 +208,7 @@ int scanhash_phi1612_8way( struct work *work, uint32_t max_nonce,
        if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
        {
           pdata[19] = n+i;
-           submit_lane_solution( work, hash+(i<<3), mythr, i );
+           submit_solution( work, hash+(i<<3), mythr );
        }
        n += 8;
     } while ( ( n < max_nonce-8 ) && !work_restart[thr_id].restart );
@@ -344,7 +344,7 @@ int scanhash_phi1612_4way( struct work *work, uint32_t max_nonce,
        if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
        {
           pdata[19] = n+i;
-           submit_lane_solution( work, hash+(i<<3), mythr, i );
+           submit_solution( work, hash+(i<<3), mythr );
        }
        n += 4;
     } while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
--- a/algo/x13/skunk-4way.c
+++ b/algo/x13/skunk-4way.c
@@ -125,7 +125,7 @@ int scanhash_skunk_8way( struct work *work, uint32_t max_nonce,
      if ( unlikely( valid_hash( hash+(i<<3), ptarget ) && !bench ) )
      {
         pdata[19] = bswap_32( n+i );
-         submit_lane_solution( work, hash+(i<<3), mythr, i );
+         submit_solution( work, hash+(i<<3), mythr );
      }
      *noncev = _mm512_add_epi32( *noncev,
                                  m512_const1_64( 0x0000000800000000 ) );
@@ -237,7 +237,7 @@ int scanhash_skunk_4way( struct work *work, uint32_t max_nonce,
      if ( unlikely( valid_hash( hash+(i<<3), ptarget ) && !bench ) )
      {
         pdata[19] = bswap_32( n + i );
-         submit_lane_solution( work, hash+(i<<3), mythr, i );
+         submit_solution( work, hash+(i<<3), mythr );
      }
      *noncev = _mm256_add_epi32( *noncev,
                                  m256_const1_64( 0x0000000400000000 ) );
--- a/algo/x13/x13-4way.c
+++ b/algo/x13/x13-4way.c
@@ -319,7 +319,7 @@ int scanhash_x13_8way( struct work *work, uint32_t max_nonce,
              && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
         {
             pdata[19] = n+i;
-             submit_lane_solution( work, hash+(i<<3), mythr, i );
+             submit_solution( work, hash+(i<<3), mythr );
         }
         n += 8;
     } while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
@@ -531,7 +531,7 @@ int scanhash_x13_4way( struct work *work, uint32_t max_nonce,
            if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
            {
               pdata[19] = n+i;
-               submit_lane_solution( work, hash+(i<<3), mythr, i );
+               submit_solution( work, hash+(i<<3), mythr );
            }
            n += 4;
         } while ( ( n < max_nonce )  && !work_restart[thr_id].restart );
--- a/algo/x13/x13bcd-4way.c
+++ b/algo/x13/x13bcd-4way.c
@@ -321,7 +321,7 @@ int scanhash_x13bcd_8way( struct work *work, uint32_t max_nonce,
        if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
        {
              pdata[19] = n+i;
-              submit_lane_solution( work, hash+(i<<3), mythr, i );
+              submit_solution( work, hash+(i<<3), mythr );
        }
        n += 8;
     } while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
@@ -541,7 +541,7 @@ int scanhash_x13bcd_4way( struct work *work, uint32_t max_nonce,
        if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
        {
            pdata[19] = n+i;
-            submit_lane_solution( work, hash+(i<<3), mythr, i );
+            submit_solution( work, hash+(i<<3), mythr );
        }
        n += 4;
     } while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
--- a/algo/x13/x13sm3-4way.c
+++ b/algo/x13/x13sm3-4way.c
@@ -246,7 +246,7 @@ int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce,
            if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
            {
               pdata[19] = n+i;
-              submit_lane_solution( work, hash+(i<<3), mythr, i );
+              submit_solution( work, hash+(i<<3), mythr );
            }
            n += 4;
         } while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
--- a/algo/x14/polytimos-4way.c
+++ b/algo/x14/polytimos-4way.c
@@ -129,7 +129,7 @@ int scanhash_polytimos_4way( struct work *work, uint32_t max_nonce,
      if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
      {
         pdata[19] = n+i;
-         submit_lane_solution( work, hash+(i<<3), mythr, i );
+         submit_solution( work, hash+(i<<3), mythr );
      }
      n += 4;

--- a/algo/x14/veltor-4way.c
+++ b/algo/x14/veltor-4way.c
@@ -108,7 +108,7 @@ int scanhash_veltor_4way( struct work *work, uint32_t max_nonce,
         if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
         {
            pdata[19] = n+i;
-            submit_lane_solution( work, hash+(i<<3), mythr, i );
+            submit_solution( work, hash+(i<<3), mythr );
         }
         n += 4;
     } while ( ( n < max_nonce ) && !(*restart) );
--- a/algo/x14/x14-4way.c
+++ b/algo/x14/x14-4way.c
@@ -324,7 +324,7 @@ int scanhash_x14_8way( struct work *work, uint32_t max_nonce,
            if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
            {
                pdata[19] = n + lane;
-                submit_lane_solution( work, lane_hash, mythr, lane );
+                submit_solution( work, lane_hash, mythr );
            }
         }
         n += 8;
@@ -534,7 +534,7 @@ int scanhash_x14_4way( struct work *work, uint32_t max_nonce,
           if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
           {
               pdata[19] = n + lane;
-               submit_lane_solution( work, lane_hash, mythr, lane );
+               submit_solution( work, lane_hash, mythr );
           }
        }
        n += 4;
--- a/algo/x15/x15-4way.c
+++ b/algo/x15/x15-4way.c
@@ -364,7 +364,7 @@ int scanhash_x15_8way( struct work *work, uint32_t max_nonce,
        if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
        {
           pdata[19] = n+i;
-           submit_lane_solution( work, hash, mythr, i );
+           submit_solution( work, hash, mythr );
        }
        n += 8;
     } while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
@@ -592,7 +592,7 @@ int scanhash_x15_4way( struct work *work, uint32_t max_nonce,
         if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
         {
            pdata[19] = n+i;
-            submit_lane_solution( work, hash, mythr, i );
+            submit_solution( work, hash, mythr );
         }
         n += 4;
     } while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
--- a/algo/x16/x16r-4way.c
+++ b/algo/x16/x16r-4way.c
@@ -505,7 +505,7 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
      if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
      {
         pdata[19] = bswap_32( n+i );
-         submit_lane_solution( work, hash+(i<<3), mythr, i );
+         submit_solution( work, hash+(i<<3), mythr );
      }
      *noncev = _mm512_add_epi32( *noncev,
                                  m512_const1_64( 0x0000000800000000 ) );
@@ -869,7 +869,7 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
      if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
      {
         pdata[19] = bswap_32( n+i );
-         submit_lane_solution( work, hash+(i<<3), mythr, i );
+         submit_solution( work, hash+(i<<3), mythr );
      }
      *noncev = _mm256_add_epi32( *noncev,
                                  m256_const1_64( 0x0000000400000000 ) );
--- a/algo/x16/x16rt-4way.c
+++ b/algo/x16/x16rt-4way.c
@@ -46,7 +46,7 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
      if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
      {
         pdata[19] = bswap_32( n+i );
-         submit_lane_solution( work, hash+(i<<3), mythr, i );
+         submit_solution( work, hash+(i<<3), mythr );
      }
      *noncev = _mm512_add_epi32( *noncev,
                                  m512_const1_64( 0x0000000800000000 ) );
@@ -99,7 +99,7 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
      if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
      {
         pdata[19] = bswap_32( n+i );
-         submit_lane_solution( work, hash+(i<<3), mythr, i );
+         submit_solution( work, hash+(i<<3), mythr );
      }
      *noncev = _mm256_add_epi32( *noncev,
                                  m256_const1_64( 0x0000000400000000 ) );
--- a/algo/x16/x16rv2-4way.c
+++ b/algo/x16/x16rv2-4way.c
@@ -678,7 +678,7 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
      if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
      {
         pdata[19] = bswap_32( n+i );
-         submit_lane_solution( work, hash+(i<<3), mythr, i );
+         submit_solution( work, hash+(i<<3), mythr );
      }
      *noncev = _mm512_add_epi32( *noncev,
                                  m512_const1_64( 0x0000000800000000 ) );
@@ -1131,7 +1131,7 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
      if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
      {
         pdata[19] = bswap_32( n+i );
-         submit_lane_solution( work, hash+(i<<3), mythr, i );
+         submit_solution( work, hash+(i<<3), mythr );
      }
      *noncev = _mm256_add_epi32( *noncev,
                                  m256_const1_64( 0x0000000400000000 ) );
--- a/algo/x16/x21s-4way.c
+++ b/algo/x16/x21s-4way.c
@@ -177,7 +177,7 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
         if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
         {
             pdata[19] = bswap_32( n + lane );
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
         }
      }
      *noncev = _mm512_add_epi32( *noncev,
@@ -347,7 +347,7 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
      if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
      {
         pdata[19] = bswap_32( n+i );
-         submit_lane_solution( work, hash+(i<<3), mythr, i );
+         submit_solution( work, hash+(i<<3), mythr );
      }
      *noncev = _mm256_add_epi32( *noncev,
                                  m256_const1_64( 0x0000000400000000 ) );
--- a/algo/x17/sonoa-4way.c
+++ b/algo/x17/sonoa-4way.c
@@ -58,7 +58,7 @@ union _sonoa_8way_context_overlay

 typedef union _sonoa_8way_context_overlay sonoa_8way_context_overlay;

-void sonoa_8way_hash( void *state, const void *input )
+int sonoa_8way_hash( void *state, const void *input, int thrid )
 {
     uint64_t vhash[8*8] __attribute__ ((aligned (128)));
     uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
@@ -186,6 +186,7 @@ void sonoa_8way_hash( void *state, const void *input )

 #endif

+     if ( work_restart[thrid].restart ) return 0;
 // 2

     bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
@@ -301,6 +302,7 @@ void sonoa_8way_hash( void *state, const void *input )
     hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
     hamsi512_8way_close( &ctx.hamsi, vhash );

+     if ( work_restart[thrid].restart ) return 0;
 // 3

     bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
@@ -430,6 +432,7 @@ void sonoa_8way_hash( void *state, const void *input )
     sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
     sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );

+     if ( work_restart[thrid].restart ) return 0;
 // 4

     intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -627,6 +630,7 @@ void sonoa_8way_hash( void *state, const void *input )

 #endif

+     if ( work_restart[thrid].restart ) return 0;
 // 5

     bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
@@ -779,6 +783,7 @@ void sonoa_8way_hash( void *state, const void *input )
     sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
     sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );

+     if ( work_restart[thrid].restart ) return 0;
 // 6

     intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -947,6 +952,7 @@ void sonoa_8way_hash( void *state, const void *input )
     sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
     sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );

+     if ( work_restart[thrid].restart ) return 0;
 // 7

     intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -1108,6 +1114,8 @@ void sonoa_8way_hash( void *state, const void *input )
     haval256_5_8way_init( &ctx.haval );
     haval256_5_8way_update( &ctx.haval, vhashA, 64 );
     haval256_5_8way_close( &ctx.haval, state );
+
+     return 1;
 }
     
 int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
@@ -1133,8 +1141,7 @@ int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,

   do
   {
-      sonoa_8way_hash( hash, vdata );
-
+      if ( sonoa_8way_hash( hash, vdata, thr_id ) )
      for ( int lane = 0; lane < 8; lane++ )
      if unlikely( ( hashd7[ lane ] <= targ32 ) )
      {
@@ -1142,7 +1149,7 @@ int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
         if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
         {
            pdata[19] = bswap_32( n + lane );
-            submit_lane_solution( work, lane_hash, mythr, lane );
+            submit_solution( work, lane_hash, mythr );
         }
      }
      *noncev = _mm512_add_epi32( *noncev,
@@ -1179,7 +1186,7 @@ union _sonoa_4way_context_overlay

 typedef union _sonoa_4way_context_overlay sonoa_4way_context_overlay;

-void sonoa_4way_hash( void *state, const void *input )
+int sonoa_4way_hash( void *state, const void *input, int thrid )
 {
     uint64_t hash0[8] __attribute__ ((aligned (64)));
     uint64_t hash1[8] __attribute__ ((aligned (64)));
@@ -1243,6 +1250,7 @@ void sonoa_4way_hash( void *state, const void *input )
     echo_full( &ctx.echo, (BitSequence *)hash3, 512,
                     (const BitSequence *)hash3, 64 );
     
+     if ( work_restart[thrid].restart ) return 0;
 // 2

     intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1302,6 +1310,7 @@ void sonoa_4way_hash( void *state, const void *input )
     hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
     hamsi512_4way_close( &ctx.hamsi, vhash );

+     if ( work_restart[thrid].restart ) return 0;
 // 3

     bmw512_4way_init( &ctx.bmw );
@@ -1366,6 +1375,7 @@ void sonoa_4way_hash( void *state, const void *input )
     sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
     sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );

+     if ( work_restart[thrid].restart ) return 0;
 // 4
     intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );

@@ -1462,6 +1472,7 @@ void sonoa_4way_hash( void *state, const void *input )
     shavite512_2way_init( &ctx.shavite );
     shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );

+     if ( work_restart[thrid].restart ) return 0;
 // 5
     rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );

@@ -1546,6 +1557,7 @@ void sonoa_4way_hash( void *state, const void *input )
     sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
     sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );

+     if ( work_restart[thrid].restart ) return 0;
 // 6

     intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1638,6 +1650,7 @@ void sonoa_4way_hash( void *state, const void *input )
     sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
     sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );

+     if ( work_restart[thrid].restart ) return 0;    
 // 7

     intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1728,6 +1741,8 @@ void sonoa_4way_hash( void *state, const void *input )
     haval256_5_4way_init( &ctx.haval );
     haval256_5_4way_update( &ctx.haval, vhashB, 64 );
     haval256_5_4way_close( &ctx.haval, state );
+
+     return 1;
 }

 int scanhash_sonoa_4way( struct work *work, const uint32_t max_nonce,
@@ -1752,8 +1767,7 @@ int scanhash_sonoa_4way( struct work *work, const uint32_t max_nonce,

     do
     {
-        sonoa_4way_hash( hash, vdata );
-
+        if ( sonoa_4way_hash( hash, vdata, thr_id ) )
        for ( int lane = 0; lane < 4; lane++ )
        if ( unlikely( hashd7[ lane ] <= targ32 ) )
        {
@@ -1761,7 +1775,7 @@ int scanhash_sonoa_4way( struct work *work, const uint32_t max_nonce,
           if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
           {
              pdata[19] = bswap_32( n + lane );
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
           }
        }
        *noncev = _mm256_add_epi32( *noncev,
--- a/algo/x17/sonoa-gate.c
+++ b/algo/x17/sonoa-gate.c
@@ -4,14 +4,14 @@ bool register_sonoa_algo( algo_gate_t* gate )
 {
 #if defined (SONOA_8WAY)
  gate->scanhash  = (void*)&scanhash_sonoa_8way;
-  gate->hash      = (void*)&sonoa_8way_hash;
+//  gate->hash      = (void*)&sonoa_8way_hash;
 #elif defined (SONOA_4WAY)
  gate->scanhash  = (void*)&scanhash_sonoa_4way;
-  gate->hash      = (void*)&sonoa_4way_hash;
+//  gate->hash      = (void*)&sonoa_4way_hash;
 #else
  init_sonoa_ctx();
  gate->scanhash  = (void*)&scanhash_sonoa;
-  gate->hash      = (void*)&sonoa_hash;
+//  gate->hash      = (void*)&sonoa_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
  return true;
--- a/algo/x17/sonoa-gate.h
+++ b/algo/x17/sonoa-gate.h
@@ -14,19 +14,19 @@ bool register_sonoa_algo( algo_gate_t* gate );

 #if defined(SONOA_8WAY)

-void sonoa_8way_hash( void *state, const void *input );
+int sonoa_8way_hash( void *state, const void *input, int thrid );
 int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
                         uint64_t *hashes_done, struct thr_info *mythr );

 #elif defined(SONOA_4WAY)

-void sonoa_4way_hash( void *state, const void *input );
+int sonoa_4way_hash( void *state, const void *input, int thrid );
 int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
                         uint64_t *hashes_done, struct thr_info *mythr );

 #else

-void sonoa_hash( void *state, const void *input );
+int sonoa_hash( void *state, const void *input, int thrid );
 int scanhash_sonoa( struct work *work, uint32_t max_nonce,
                  uint64_t *hashes_done, struct thr_info *mythr );
 void init_sonoa_ctx();
--- a/algo/x17/sonoa.c
+++ b/algo/x17/sonoa.c
@@ -83,27 +83,27 @@ void init_sonoa_ctx()
        sph_haval256_5_init(&sonoa_ctx.haval);
 };

-void sonoa_hash( void *state, const void *input )
+int sonoa_hash( void *state, const void *input, int thrid )
 {
 	uint8_t hash[128] __attribute__ ((aligned (64)));
-        sonoa_ctx_holder ctx __attribute__ ((aligned (64)));
-        memcpy( &ctx, &sonoa_ctx, sizeof(sonoa_ctx) );
+   sonoa_ctx_holder ctx __attribute__ ((aligned (64)));
+   memcpy( &ctx, &sonoa_ctx, sizeof(sonoa_ctx) );

-        sph_blake512(&ctx.blake, input, 80);
+   sph_blake512(&ctx.blake, input, 80);
 	sph_blake512_close(&ctx.blake, hash);

 	sph_bmw512(&ctx.bmw, hash, 64);
 	sph_bmw512_close(&ctx.bmw, hash);

 #if defined(__AES__)
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
-                                  (const char*)hash, 512 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
+                                     (const char*)hash, 512 );
 #else
-        sph_groestl512(&ctx.groestl, hash, 64);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512(&ctx.groestl, hash, 64);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif

-	sph_skein512(&ctx.skein, hash, 64);
+   sph_skein512(&ctx.skein, hash, 64);
 	sph_skein512_close(&ctx.skein, hash);

 	sph_jh512(&ctx.jh, hash, 64);
@@ -112,454 +112,461 @@ void sonoa_hash( void *state, const void *input )
 	sph_keccak512(&ctx.keccak, hash, 64);
 	sph_keccak512_close(&ctx.keccak, hash);

-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
-                                (const BitSequence*)hash, 64 );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+                                 (const BitSequence*)hash, 64 );

-        cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
-                              (const byte*)hash, 64 );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
+                                   (const byte*)hash, 64 );

 	sph_shavite512(&ctx.shavite, hash, 64);
 	sph_shavite512_close(&ctx.shavite, hash);

-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                         (const BitSequence *)hash, 512 );

 #if defined(__AES__)
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                            (const BitSequence *)hash, 512 );
 #else
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif

+   if ( work_restart[thrid].restart ) return 0;
 //

-        sph_bmw512_init( &ctx.bmw);
-        sph_bmw512(&ctx.bmw, hash, 64);
-        sph_bmw512_close(&ctx.bmw, hash);
+   sph_bmw512_init( &ctx.bmw);
+   sph_bmw512(&ctx.bmw, hash, 64);
+   sph_bmw512_close(&ctx.bmw, hash);

 #if defined(__AES__)
-        init_groestl( &ctx.groestl, 64 );
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
-                                  (const char*)hash, 512 );
+   init_groestl( &ctx.groestl, 64 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
+                                     (const char*)hash, 512 );
 #else
-        sph_groestl512_init(&ctx.groestl );
-        sph_groestl512(&ctx.groestl, hash, 64);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512_init(&ctx.groestl );
+   sph_groestl512(&ctx.groestl, hash, 64);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif

-        sph_skein512_init( &ctx.skein);
-        sph_skein512(&ctx.skein, hash, 64);
-        sph_skein512_close(&ctx.skein, hash);
+   sph_skein512_init( &ctx.skein);
+   sph_skein512(&ctx.skein, hash, 64);
+   sph_skein512_close(&ctx.skein, hash);

-        sph_jh512_init( &ctx.jh);
-        sph_jh512(&ctx.jh, hash, 64);
-        sph_jh512_close(&ctx.jh, hash);
+   sph_jh512_init( &ctx.jh);
+   sph_jh512(&ctx.jh, hash, 64);
+   sph_jh512_close(&ctx.jh, hash);

-        sph_keccak512_init( &ctx.keccak );
-        sph_keccak512(&ctx.keccak, hash, 64);
-        sph_keccak512_close(&ctx.keccak, hash);
+   sph_keccak512_init( &ctx.keccak );
+   sph_keccak512(&ctx.keccak, hash, 64);
+   sph_keccak512_close(&ctx.keccak, hash);

-        init_luffa( &ctx.luffa, 512 );
-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
-                                (const BitSequence*)hash, 64 );
+   init_luffa( &ctx.luffa, 512 );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+                                 (const BitSequence*)hash, 64 );

-        cubehashInit( &ctx.cubehash, 512, 16, 32 );
-        cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
-                              (const byte*)hash, 64 );
+   cubehashInit( &ctx.cubehash, 512, 16, 32 );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
+                                   (const byte*)hash, 64 );

-        sph_shavite512_init( &ctx.shavite );
-        sph_shavite512(&ctx.shavite, hash, 64);
-        sph_shavite512_close(&ctx.shavite, hash);
+   sph_shavite512_init( &ctx.shavite );
+   sph_shavite512(&ctx.shavite, hash, 64);
+   sph_shavite512_close(&ctx.shavite, hash);

-        init_sd( &ctx.simd, 512 );
-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   init_sd( &ctx.simd, 512 );
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                         (const BitSequence *)hash, 512 );

 #if defined(__AES__)
-        init_echo( &ctx.echo, 512 );
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   init_echo( &ctx.echo, 512 );
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                            (const BitSequence *)hash, 512 );
 #else
-        sph_echo512_init( &ctx.echo );
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512_init( &ctx.echo );
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif

-        sph_hamsi512(&ctx.hamsi, hash, 64);
-        sph_hamsi512_close(&ctx.hamsi, hash);
+   sph_hamsi512(&ctx.hamsi, hash, 64);
+   sph_hamsi512_close(&ctx.hamsi, hash);
 	
+   if ( work_restart[thrid].restart ) return 0;
 //

-        sph_bmw512_init( &ctx.bmw);
-	sph_bmw512(&ctx.bmw, hash, 64);
-        sph_bmw512_close(&ctx.bmw, hash);
+   sph_bmw512_init( &ctx.bmw);
+   sph_bmw512(&ctx.bmw, hash, 64);
+   sph_bmw512_close(&ctx.bmw, hash);

 #if defined(__AES__)
-        init_groestl( &ctx.groestl, 64 );
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
-                                  (const char*)hash, 512 );
+   init_groestl( &ctx.groestl, 64 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
+                                     (const char*)hash, 512 );
 #else
-        sph_groestl512_init(&ctx.groestl );
-        sph_groestl512(&ctx.groestl, hash, 64);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512_init(&ctx.groestl );
+   sph_groestl512(&ctx.groestl, hash, 64);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif

-        sph_skein512_init( &ctx.skein);
-        sph_skein512(&ctx.skein, hash, 64);
-        sph_skein512_close(&ctx.skein, hash);
+   sph_skein512_init( &ctx.skein);
+   sph_skein512(&ctx.skein, hash, 64);
+   sph_skein512_close(&ctx.skein, hash);

-        sph_jh512_init( &ctx.jh);
-        sph_jh512(&ctx.jh, hash, 64);
-        sph_jh512_close(&ctx.jh, hash);
+   sph_jh512_init( &ctx.jh);
+   sph_jh512(&ctx.jh, hash, 64);
+   sph_jh512_close(&ctx.jh, hash);

-        sph_keccak512_init( &ctx.keccak );
-        sph_keccak512(&ctx.keccak, hash, 64);
-        sph_keccak512_close(&ctx.keccak, hash);
+   sph_keccak512_init( &ctx.keccak );
+   sph_keccak512(&ctx.keccak, hash, 64);
+   sph_keccak512_close(&ctx.keccak, hash);

-        init_luffa( &ctx.luffa, 512 );
-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
-                                (const BitSequence*)hash, 64 );
+   init_luffa( &ctx.luffa, 512 );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+                                 (const BitSequence*)hash, 64 );

-        cubehashInit( &ctx.cubehash, 512, 16, 32 );
-        cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
-                              (const byte*)hash, 64 );
+   cubehashInit( &ctx.cubehash, 512, 16, 32 );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
+                                  (const byte*)hash, 64 );

-        sph_shavite512_init( &ctx.shavite );
-        sph_shavite512(&ctx.shavite, hash, 64);
-        sph_shavite512_close(&ctx.shavite, hash);
+   sph_shavite512_init( &ctx.shavite );
+   sph_shavite512(&ctx.shavite, hash, 64);
+   sph_shavite512_close(&ctx.shavite, hash);

-        init_sd( &ctx.simd, 512 );
-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   init_sd( &ctx.simd, 512 );
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                         (const BitSequence *)hash, 512 );

 #if defined(__AES__)
-        init_echo( &ctx.echo, 512 );
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   init_echo( &ctx.echo, 512 );
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                            (const BitSequence *)hash, 512 );
 #else
-        sph_echo512_init( &ctx.echo );
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512_init( &ctx.echo );
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif

-        sph_hamsi512_init( &ctx.hamsi );
-        sph_hamsi512(&ctx.hamsi, hash, 64);
-        sph_hamsi512_close(&ctx.hamsi, hash);
+   sph_hamsi512_init( &ctx.hamsi );
+   sph_hamsi512(&ctx.hamsi, hash, 64);
+   sph_hamsi512_close(&ctx.hamsi, hash);

-        sph_fugue512(&ctx.fugue, hash, 64);
-        sph_fugue512_close(&ctx.fugue, hash);
+   sph_fugue512(&ctx.fugue, hash, 64);
+   sph_fugue512_close(&ctx.fugue, hash);

+   if ( work_restart[thrid].restart ) return 0;
 //

-        sph_bmw512_init( &ctx.bmw);
-        sph_bmw512(&ctx.bmw, hash, 64);
-        sph_bmw512_close(&ctx.bmw, hash);
+   sph_bmw512_init( &ctx.bmw);
+   sph_bmw512(&ctx.bmw, hash, 64);
+   sph_bmw512_close(&ctx.bmw, hash);

 #if defined(__AES__)
-        init_groestl( &ctx.groestl, 64 );
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
+   init_groestl( &ctx.groestl, 64 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
                                  (const char*)hash, 512 );
 #else
-        sph_groestl512_init(&ctx.groestl );
-        sph_groestl512(&ctx.groestl, hash, 64);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512_init(&ctx.groestl );
+   sph_groestl512(&ctx.groestl, hash, 64);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif

-        sph_skein512_init( &ctx.skein);
-        sph_skein512(&ctx.skein, hash, 64);
-        sph_skein512_close(&ctx.skein, hash);
+   sph_skein512_init( &ctx.skein);
+   sph_skein512(&ctx.skein, hash, 64);
+   sph_skein512_close(&ctx.skein, hash);

-        sph_jh512_init( &ctx.jh);
-        sph_jh512(&ctx.jh, hash, 64);
-        sph_jh512_close(&ctx.jh, hash);
+   sph_jh512_init( &ctx.jh);
+   sph_jh512(&ctx.jh, hash, 64);
+   sph_jh512_close(&ctx.jh, hash);

-        sph_keccak512_init( &ctx.keccak );
-        sph_keccak512(&ctx.keccak, hash, 64);
-        sph_keccak512_close(&ctx.keccak, hash);
+   sph_keccak512_init( &ctx.keccak );
+   sph_keccak512(&ctx.keccak, hash, 64);
+   sph_keccak512_close(&ctx.keccak, hash);

-        init_luffa( &ctx.luffa, 512 );
-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+   init_luffa( &ctx.luffa, 512 );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
                                (const BitSequence*)hash, 64 );

-        cubehashInit( &ctx.cubehash, 512, 16, 32 );
-        cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
+   cubehashInit( &ctx.cubehash, 512, 16, 32 );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
                              (const byte*)hash, 64 );

-        sph_shavite512_init( &ctx.shavite );
-        sph_shavite512(&ctx.shavite, hash, 64);
-        sph_shavite512_close(&ctx.shavite, hash);
+   sph_shavite512_init( &ctx.shavite );
+   sph_shavite512(&ctx.shavite, hash, 64);
+   sph_shavite512_close(&ctx.shavite, hash);

-        init_sd( &ctx.simd, 512 );
-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   init_sd( &ctx.simd, 512 );
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                         (const BitSequence *)hash, 512 );

 #if defined(__AES__)
-        init_echo( &ctx.echo, 512 );
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   init_echo( &ctx.echo, 512 );
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                            (const BitSequence *)hash, 512 );
 #else
-        sph_echo512_init( &ctx.echo );
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512_init( &ctx.echo );
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif

-        sph_hamsi512_init( &ctx.hamsi );
-        sph_hamsi512(&ctx.hamsi, hash, 64);
-        sph_hamsi512_close(&ctx.hamsi, hash);
+   sph_hamsi512_init( &ctx.hamsi );
+   sph_hamsi512(&ctx.hamsi, hash, 64);
+   sph_hamsi512_close(&ctx.hamsi, hash);

-        sph_fugue512_init( &ctx.fugue );
-        sph_fugue512(&ctx.fugue, hash, 64);
-        sph_fugue512_close(&ctx.fugue, hash);
+   sph_fugue512_init( &ctx.fugue );
+   sph_fugue512(&ctx.fugue, hash, 64);
+   sph_fugue512_close(&ctx.fugue, hash);

-        sph_shabal512(&ctx.shabal, hash, 64);
-        sph_shabal512_close(&ctx.shabal, hash);
+   sph_shabal512(&ctx.shabal, hash, 64);
+   sph_shabal512_close(&ctx.shabal, hash);

-        sph_hamsi512_init( &ctx.hamsi );
-        sph_hamsi512(&ctx.hamsi, hash, 64);
-        sph_hamsi512_close(&ctx.hamsi, hash);
+   sph_hamsi512_init( &ctx.hamsi );
+   sph_hamsi512(&ctx.hamsi, hash, 64);
+   sph_hamsi512_close(&ctx.hamsi, hash);

 #if defined(__AES__)
-        init_echo( &ctx.echo, 512 );
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   init_echo( &ctx.echo, 512 );
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                            (const BitSequence *)hash, 512 );
 #else
-        sph_echo512_init( &ctx.echo );
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512_init( &ctx.echo );
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif

-        sph_shavite512_init( &ctx.shavite );
-        sph_shavite512(&ctx.shavite, hash, 64);
-        sph_shavite512_close(&ctx.shavite, hash);
+   sph_shavite512_init( &ctx.shavite );
+   sph_shavite512(&ctx.shavite, hash, 64);
+   sph_shavite512_close(&ctx.shavite, hash);

+   if ( work_restart[thrid].restart ) return 0;
 //

-        sph_bmw512_init( &ctx.bmw);
-        sph_bmw512(&ctx.bmw, hash, 64);
-        sph_bmw512_close(&ctx.bmw, hash);
+   sph_bmw512_init( &ctx.bmw);
+   sph_bmw512(&ctx.bmw, hash, 64);
+   sph_bmw512_close(&ctx.bmw, hash);

-        sph_shabal512_init( &ctx.shabal );
+   sph_shabal512_init( &ctx.shabal );
 	sph_shabal512(&ctx.shabal, hash, 64);
-        sph_shabal512_close(&ctx.shabal, hash);
+   sph_shabal512_close(&ctx.shabal, hash);

 #if defined(__AES__)
-        init_groestl( &ctx.groestl, 64 );
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
+   init_groestl( &ctx.groestl, 64 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
                                  (const char*)hash, 512 );
 #else
-        sph_groestl512_init(&ctx.groestl );
-        sph_groestl512(&ctx.groestl, hash, 64);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512_init(&ctx.groestl );
+   sph_groestl512(&ctx.groestl, hash, 64);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif

-        sph_skein512_init( &ctx.skein);
-        sph_skein512(&ctx.skein, hash, 64);
-        sph_skein512_close(&ctx.skein, hash);
+   sph_skein512_init( &ctx.skein);
+   sph_skein512(&ctx.skein, hash, 64);
+   sph_skein512_close(&ctx.skein, hash);

-        sph_jh512_init( &ctx.jh);
-        sph_jh512(&ctx.jh, hash, 64);
-        sph_jh512_close(&ctx.jh, hash);
+   sph_jh512_init( &ctx.jh);
+   sph_jh512(&ctx.jh, hash, 64);
+   sph_jh512_close(&ctx.jh, hash);

-        sph_keccak512_init( &ctx.keccak );
-        sph_keccak512(&ctx.keccak, hash, 64);
-        sph_keccak512_close(&ctx.keccak, hash);
+   sph_keccak512_init( &ctx.keccak );
+   sph_keccak512(&ctx.keccak, hash, 64);
+   sph_keccak512_close(&ctx.keccak, hash);

-        init_luffa( &ctx.luffa, 512 );
-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+   init_luffa( &ctx.luffa, 512 );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
                                (const BitSequence*)hash, 64 );

-        cubehashInit( &ctx.cubehash, 512, 16, 32 );
-        cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
+   cubehashInit( &ctx.cubehash, 512, 16, 32 );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
                              (const byte*)hash, 64 );

-        sph_shavite512_init( &ctx.shavite );
-        sph_shavite512(&ctx.shavite, hash, 64);
-        sph_shavite512_close(&ctx.shavite, hash);
+   sph_shavite512_init( &ctx.shavite );
+   sph_shavite512(&ctx.shavite, hash, 64);
+   sph_shavite512_close(&ctx.shavite, hash);

-        init_sd( &ctx.simd, 512 );
-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   init_sd( &ctx.simd, 512 );
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                         (const BitSequence *)hash, 512 );

 #if defined(__AES__)
-        init_echo( &ctx.echo, 512 );
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   init_echo( &ctx.echo, 512 );
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                            (const BitSequence *)hash, 512 );
 #else
-        sph_echo512_init( &ctx.echo );
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512_init( &ctx.echo );
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif

-        sph_hamsi512_init( &ctx.hamsi );
-        sph_hamsi512(&ctx.hamsi, hash, 64);
-        sph_hamsi512_close(&ctx.hamsi, hash);
+   sph_hamsi512_init( &ctx.hamsi );
+   sph_hamsi512(&ctx.hamsi, hash, 64);
+   sph_hamsi512_close(&ctx.hamsi, hash);

-        sph_fugue512_init( &ctx.fugue );
-        sph_fugue512(&ctx.fugue, hash, 64);
-        sph_fugue512_close(&ctx.fugue, hash);
+   sph_fugue512_init( &ctx.fugue );
+   sph_fugue512(&ctx.fugue, hash, 64);
+   sph_fugue512_close(&ctx.fugue, hash);

-        sph_shabal512_init( &ctx.shabal );
-        sph_shabal512(&ctx.shabal, hash, 64);
-        sph_shabal512_close(&ctx.shabal, hash);
+   sph_shabal512_init( &ctx.shabal );
+   sph_shabal512(&ctx.shabal, hash, 64);
+   sph_shabal512_close(&ctx.shabal, hash);

-        sph_whirlpool(&ctx.whirlpool, hash, 64);
-        sph_whirlpool_close(&ctx.whirlpool, hash);
+   sph_whirlpool(&ctx.whirlpool, hash, 64);
+   sph_whirlpool_close(&ctx.whirlpool, hash);

+   if ( work_restart[thrid].restart ) return 0;
 //
-        sph_bmw512_init( &ctx.bmw);
-        sph_bmw512(&ctx.bmw, hash, 64);
-        sph_bmw512_close(&ctx.bmw, hash);
+   sph_bmw512_init( &ctx.bmw);
+   sph_bmw512(&ctx.bmw, hash, 64);
+   sph_bmw512_close(&ctx.bmw, hash);

 #if defined(__AES__)
-        init_groestl( &ctx.groestl, 64 );
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
+   init_groestl( &ctx.groestl, 64 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
                                  (const char*)hash, 512 );
 #else
-        sph_groestl512_init(&ctx.groestl );
-        sph_groestl512(&ctx.groestl, hash, 64);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512_init(&ctx.groestl );
+   sph_groestl512(&ctx.groestl, hash, 64);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif

-        sph_skein512_init( &ctx.skein);
-        sph_skein512(&ctx.skein, hash, 64);
-        sph_skein512_close(&ctx.skein, hash);
+   sph_skein512_init( &ctx.skein);
+   sph_skein512(&ctx.skein, hash, 64);
+   sph_skein512_close(&ctx.skein, hash);

-        sph_jh512_init( &ctx.jh);
-        sph_jh512(&ctx.jh, hash, 64);
-        sph_jh512_close(&ctx.jh, hash);
+   sph_jh512_init( &ctx.jh);
+   sph_jh512(&ctx.jh, hash, 64);
+   sph_jh512_close(&ctx.jh, hash);

-        sph_keccak512_init( &ctx.keccak );
-        sph_keccak512(&ctx.keccak, hash, 64);
-        sph_keccak512_close(&ctx.keccak, hash);
+   sph_keccak512_init( &ctx.keccak );
+   sph_keccak512(&ctx.keccak, hash, 64);
+   sph_keccak512_close(&ctx.keccak, hash);

-        init_luffa( &ctx.luffa, 512 );
-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+   init_luffa( &ctx.luffa, 512 );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
                                (const BitSequence*)hash, 64 );

-        cubehashInit( &ctx.cubehash, 512, 16, 32 );
-        cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
+   cubehashInit( &ctx.cubehash, 512, 16, 32 );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
                              (const byte*)hash, 64 );

-        sph_shavite512_init( &ctx.shavite );
-        sph_shavite512(&ctx.shavite, hash, 64);
-        sph_shavite512_close(&ctx.shavite, hash);
+   sph_shavite512_init( &ctx.shavite );
+   sph_shavite512(&ctx.shavite, hash, 64);
+   sph_shavite512_close(&ctx.shavite, hash);

-        init_sd( &ctx.simd, 512 );
-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   init_sd( &ctx.simd, 512 );
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                         (const BitSequence *)hash, 512 );

 #if defined(__AES__)
-        init_echo( &ctx.echo, 512 );
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   init_echo( &ctx.echo, 512 );
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                            (const BitSequence *)hash, 512 );
 #else
-        sph_echo512_init( &ctx.echo );
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512_init( &ctx.echo );
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif

-        sph_hamsi512_init( &ctx.hamsi );
-        sph_hamsi512(&ctx.hamsi, hash, 64);
-        sph_hamsi512_close(&ctx.hamsi, hash);
+   sph_hamsi512_init( &ctx.hamsi );
+   sph_hamsi512(&ctx.hamsi, hash, 64);
+   sph_hamsi512_close(&ctx.hamsi, hash);

-        sph_fugue512_init( &ctx.fugue );
-        sph_fugue512(&ctx.fugue, hash, 64);
-        sph_fugue512_close(&ctx.fugue, hash);
+   sph_fugue512_init( &ctx.fugue );
+   sph_fugue512(&ctx.fugue, hash, 64);
+   sph_fugue512_close(&ctx.fugue, hash);

-        sph_shabal512_init( &ctx.shabal );
-        sph_shabal512(&ctx.shabal, hash, 64);
-        sph_shabal512_close(&ctx.shabal, hash);
+   sph_shabal512_init( &ctx.shabal );
+   sph_shabal512(&ctx.shabal, hash, 64);
+   sph_shabal512_close(&ctx.shabal, hash);

-        sph_whirlpool_init( &ctx.whirlpool );
-        sph_whirlpool(&ctx.whirlpool, hash, 64);
-        sph_whirlpool_close(&ctx.whirlpool, hash);
+   sph_whirlpool_init( &ctx.whirlpool );
+   sph_whirlpool(&ctx.whirlpool, hash, 64);
+   sph_whirlpool_close(&ctx.whirlpool, hash);

-        SHA512_Update( &ctx.sha512, hash, 64 );
-        SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
+   SHA512_Update( &ctx.sha512, hash, 64 );
+   SHA512_Final( (unsigned char*) hash, &ctx.sha512 );

-        sph_whirlpool_init( &ctx.whirlpool );
-        sph_whirlpool(&ctx.whirlpool, hash, 64);
-        sph_whirlpool_close(&ctx.whirlpool, hash);
+   sph_whirlpool_init( &ctx.whirlpool );
+   sph_whirlpool(&ctx.whirlpool, hash, 64);
+   sph_whirlpool_close(&ctx.whirlpool, hash);

+   if ( work_restart[thrid].restart ) return 0;
 //

-        sph_bmw512_init( &ctx.bmw);
-        sph_bmw512(&ctx.bmw, hash, 64);
-        sph_bmw512_close(&ctx.bmw, hash);
+   sph_bmw512_init( &ctx.bmw);
+   sph_bmw512(&ctx.bmw, hash, 64);
+   sph_bmw512_close(&ctx.bmw, hash);

 #if defined(__AES__)
-        init_groestl( &ctx.groestl, 64 );
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
+   init_groestl( &ctx.groestl, 64 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
                                  (const char*)hash, 512 );
 #else
-        sph_groestl512_init(&ctx.groestl );
-        sph_groestl512(&ctx.groestl, hash, 64);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512_init(&ctx.groestl );
+   sph_groestl512(&ctx.groestl, hash, 64);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif

-        sph_skein512_init( &ctx.skein);
-        sph_skein512(&ctx.skein, hash, 64);
-        sph_skein512_close(&ctx.skein, hash);
+   sph_skein512_init( &ctx.skein);
+   sph_skein512(&ctx.skein, hash, 64);
+   sph_skein512_close(&ctx.skein, hash);

-        sph_jh512_init( &ctx.jh);
-        sph_jh512(&ctx.jh, hash, 64);
-        sph_jh512_close(&ctx.jh, hash);
+   sph_jh512_init( &ctx.jh);
+   sph_jh512(&ctx.jh, hash, 64);
+   sph_jh512_close(&ctx.jh, hash);

-        sph_keccak512_init( &ctx.keccak );
-        sph_keccak512(&ctx.keccak, hash, 64);
-        sph_keccak512_close(&ctx.keccak, hash);
+   sph_keccak512_init( &ctx.keccak );
+   sph_keccak512(&ctx.keccak, hash, 64);
+   sph_keccak512_close(&ctx.keccak, hash);

-        init_luffa( &ctx.luffa, 512 );
-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+   init_luffa( &ctx.luffa, 512 );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
                                (const BitSequence*)hash, 64 );

-        cubehashInit( &ctx.cubehash, 512, 16, 32 );
-        cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
+   cubehashInit( &ctx.cubehash, 512, 16, 32 );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
                              (const byte*)hash, 64 );

-        sph_shavite512_init( &ctx.shavite );
-        sph_shavite512(&ctx.shavite, hash, 64);
-        sph_shavite512_close(&ctx.shavite, hash);
+   sph_shavite512_init( &ctx.shavite );
+   sph_shavite512(&ctx.shavite, hash, 64);
+   sph_shavite512_close(&ctx.shavite, hash);

-        init_sd( &ctx.simd, 512 );
-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   init_sd( &ctx.simd, 512 );
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                         (const BitSequence *)hash, 512 );

 #if defined(__AES__)
-        init_echo( &ctx.echo, 512 );
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   init_echo( &ctx.echo, 512 );
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                            (const BitSequence *)hash, 512 );
 #else
-        sph_echo512_init( &ctx.echo );
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512_init( &ctx.echo );
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif

-        sph_hamsi512_init( &ctx.hamsi );
-        sph_hamsi512(&ctx.hamsi, hash, 64);
-        sph_hamsi512_close(&ctx.hamsi, hash);
+   sph_hamsi512_init( &ctx.hamsi );
+   sph_hamsi512(&ctx.hamsi, hash, 64);
+   sph_hamsi512_close(&ctx.hamsi, hash);

-        sph_fugue512_init( &ctx.fugue );
-        sph_fugue512(&ctx.fugue, hash, 64);
-        sph_fugue512_close(&ctx.fugue, hash);
+   sph_fugue512_init( &ctx.fugue );
+   sph_fugue512(&ctx.fugue, hash, 64);
+   sph_fugue512_close(&ctx.fugue, hash);

-        sph_shabal512_init( &ctx.shabal );
-        sph_shabal512(&ctx.shabal, hash, 64);
-        sph_shabal512_close(&ctx.shabal, hash);
+   sph_shabal512_init( &ctx.shabal );
+   sph_shabal512(&ctx.shabal, hash, 64);
+   sph_shabal512_close(&ctx.shabal, hash);

-        sph_whirlpool_init( &ctx.whirlpool );
-        sph_whirlpool(&ctx.whirlpool, hash, 64);
-        sph_whirlpool_close(&ctx.whirlpool, hash);
+   sph_whirlpool_init( &ctx.whirlpool );
+   sph_whirlpool(&ctx.whirlpool, hash, 64);
+   sph_whirlpool_close(&ctx.whirlpool, hash);

-        SHA512_Init( &ctx.sha512 );
-        SHA512_Update( &ctx.sha512, hash, 64 );
-        SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
+   SHA512_Init( &ctx.sha512 );
+   SHA512_Update( &ctx.sha512, hash, 64 );
+   SHA512_Final( (unsigned char*) hash, &ctx.sha512 );

-        sph_haval256_5(&ctx.haval,(const void*) hash, 64);
-        sph_haval256_5_close(&ctx.haval, hash);
+   sph_haval256_5(&ctx.haval,(const void*) hash, 64);
+   sph_haval256_5_close(&ctx.haval, hash);

   memcpy(state, hash, 32);
+   return 1;
 }

 int scanhash_sonoa( struct work *work, uint32_t max_nonce,
@@ -579,7 +586,7 @@ int scanhash_sonoa( struct work *work, uint32_t max_nonce,
   do
   {
      edata[19] = n;
-      sonoa_hash( hash64, edata );
+      if ( sonoa_hash( hash64, edata, thr_id ) )
      if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
      {
         pdata[19] = bswap_32( n );
--- a/algo/x17/x17-4way.c
+++ b/algo/x17/x17-4way.c
@@ -264,7 +264,7 @@ int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
         if ( likely( valid_hash( lane_hash, ptarget ) ) )
         {
            pdata[19] = bswap_32( n + lane );
-            submit_lane_solution( work, lane_hash, mythr, lane );
+            submit_solution( work, lane_hash, mythr );
         }
      }
      *noncev = _mm512_add_epi32( *noncev,
@@ -432,7 +432,7 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
         if ( valid_hash( lane_hash, ptarget ) )
         {
            pdata[19] = bswap_32( n + lane );
-            submit_lane_solution( work, lane_hash, mythr, lane );
+            submit_solution( work, lane_hash, mythr );
         }            
      }
      *noncev = _mm256_add_epi32( *noncev,
--- a/algo/x17/xevan-4way.c
+++ b/algo/x17/xevan-4way.c
@@ -429,7 +429,7 @@ int scanhash_xevan_8way( struct work *work, uint32_t max_nonce,
         if ( likely( valid_hash( lane_hash, ptarget ) ) )
         {
            pdata[19] = bswap_32( n + lane );
-            submit_lane_solution( work, lane_hash, mythr, lane );
+            submit_solution( work, lane_hash, mythr );
         }
      }
      *noncev = _mm512_add_epi32( *noncev,
@@ -699,7 +699,7 @@ int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
 	      if ( valid_hash( lane_hash, ptarget ) )
         {
             pdata[19] = bswap_32( n + lane );
-             submit_lane_solution( work, lane_hash, mythr, lane );
+             submit_solution( work, lane_hash, mythr );
         }
      }
      *noncev = _mm256_add_epi32( *noncev,
--- a/algo/x22/x22i-4way.c
+++ b/algo/x22/x22i-4way.c
@@ -493,7 +493,7 @@ int scanhash_x22i_8way( struct work* work, uint32_t max_nonce,
         if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
         {
            pdata[19] = n + lane;
-            submit_lane_solution( work, lane_hash, mythr, lane );
+            submit_solution( work, lane_hash, mythr );
         }
      }
      n += 8;
--- a/algo/x22/x25x-4way.c
+++ b/algo/x22/x25x-4way.c
@@ -625,7 +625,7 @@ int scanhash_x25x_8way( struct work* work, uint32_t max_nonce,
         if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
         {
              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
         }
      }
      n += 8;
--- a/algo/yescrypt/yescrypt-platform.h
+++ b/algo/yescrypt/yescrypt-platform.h
@@ -31,6 +31,7 @@
 #undef HUGEPAGE_SIZE
 #endif

+/*
 static __inline uint32_t
 le32dec(const void *pp)
 {
@@ -50,6 +51,7 @@ le32enc(void *pp, uint32_t x)
 	p[2] = (x >> 16) & 0xff;
 	p[3] = (x >> 24) & 0xff;
 }
+*/

 static void *
 alloc_region(yescrypt_region_t * region, size_t size)
@@ -154,7 +156,7 @@ int yescrypt_init_shared(yescrypt_shared_t * shared, const uint8_t * param, size
 	if (yescrypt_kdf(&dummy, shared1,
 	    param, paramlen, NULL, 0, N, r, p, 0,
 	    YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1,
-	    salt, sizeof(salt)))
+	    salt, sizeof(salt), 0 ) )
 		goto out;

 	half1 = half2 = *shared;
@@ -166,19 +168,19 @@ int yescrypt_init_shared(yescrypt_shared_t * shared, const uint8_t * param, size
 	if (p > 1 && yescrypt_kdf(&half1, &half2.shared1,
 	    param, paramlen, salt, sizeof(salt), N, r, p, 0,
 	    YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_2,
-	    salt, sizeof(salt)))
+	    salt, sizeof(salt), 0 ))
 		goto out;

 	if (yescrypt_kdf(&half2, &half1.shared1,
 	    param, paramlen, salt, sizeof(salt), N, r, p, 0,
 	    YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1,
-	    salt, sizeof(salt)))
+	    salt, sizeof(salt), 0))
 		goto out;

 	if (yescrypt_kdf(&half1, &half2.shared1,
 	    param, paramlen, salt, sizeof(salt), N, r, p, 0,
 	    YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1,
-	    buf, buflen))
+	    buf, buflen, 0))
 		goto out;

 	shared->mask1 = mask;
--- a/algo/yescrypt/yescrypt-simd.c
+++ b/algo/yescrypt/yescrypt-simd.c
@@ -1149,7 +1149,7 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
    const uint8_t * passwd, size_t passwdlen,
    const uint8_t * salt, size_t saltlen,
    uint64_t N, uint32_t r, uint32_t p, uint32_t t, yescrypt_flags_t flags,
-    uint8_t * buf, size_t buflen)
+    uint8_t * buf, size_t buflen, int thrid )
 {
 	uint8_t _ALIGN(128) sha256[32];
 	yescrypt_region_t tmp;
@@ -1157,6 +1157,7 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
 	size_t B_size, V_size, XY_size, need;
 	uint8_t * B, * S;
 	salsa20_blk_t * V, * XY;
+   int retval = 1;

 	/*
 	 * YESCRYPT_PARALLEL_SMIX is a no-op at p = 1 for its intended purpose,
@@ -1312,6 +1313,12 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
 	/* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
 	PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, B, B_size);

+   if ( work_restart[thrid].restart ) 
+   { 
+     retval = 0; 
+     goto out;
+   }
+   
 	if (t || flags)
 		memcpy(sha256, B, sizeof(sha256));

@@ -1339,9 +1346,21 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
 		}
 	}

+   if ( work_restart[thrid].restart )
+   {
+     retval = 0;
+     goto out;
+   }
+
 	/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
 	PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, buf, buflen);

+   if ( work_restart[thrid].restart ) 
+   { 
+     retval = 0; 
+     goto out;
+   }
+   
 	/*
 	 * Except when computing classic scrypt, allow all computation so far
 	 * to be performed on the client.  The final steps below match those of
@@ -1370,9 +1389,10 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
 	   }
 	}

+out:   
 	if (free_region(&tmp))
 		return -1;

 	/* Success! */
-	return 0;
+	return retval;
 }
--- a/algo/yescrypt/yescrypt.c
+++ b/algo/yescrypt/yescrypt.c
@@ -106,7 +106,8 @@ static const uint8_t* decode64_uint32(uint32_t* dst, uint32_t dstbits, const uin
 }

 uint8_t* yescrypt_r(const yescrypt_shared_t* shared, yescrypt_local_t* local,
-    const uint8_t* passwd, size_t passwdlen, const uint8_t* setting, uint8_t* buf, size_t buflen)
+    const uint8_t* passwd, size_t passwdlen, const uint8_t* setting,
+    uint8_t* buf, size_t buflen, int thrid )
 {
 	uint8_t hash[HASH_SIZE];
 	const uint8_t * src, * salt;
@@ -210,7 +211,9 @@ uint8_t* yescrypt_r(const yescrypt_shared_t* shared, yescrypt_local_t* local,
 		return NULL;
 	}

-	if (yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, N, r, p, 0, flags, hash, sizeof(hash))) {
+	if ( yescrypt_kdf( shared, local, passwd, passwdlen, salt, saltlen, N, r, p,
+            0, flags, hash, sizeof(hash), thrid ) == -1 )
+   {
 		printf("died10 ...");
 		fflush(stdout);
 		return NULL;
@@ -237,7 +240,7 @@ uint8_t* yescrypt_r(const yescrypt_shared_t* shared, yescrypt_local_t* local,
 	return buf;
 }

-uint8_t* yescrypt(const uint8_t* passwd, const uint8_t* setting)
+uint8_t* yescrypt(const uint8_t* passwd, const uint8_t* setting, int thrid )
 {
 	static uint8_t buf[4 + 1 + 5 + 5 + BYTES2CHARS(32) + 1 + HASH_LEN + 1];
 	yescrypt_shared_t shared;
@@ -252,7 +255,7 @@ uint8_t* yescrypt(const uint8_t* passwd, const uint8_t* setting)
 		return NULL;
 	}
 	retval = yescrypt_r(&shared, &local,
-	    passwd, 80, setting, buf, sizeof(buf));
+	    passwd, 80, setting, buf, sizeof(buf), thrid );
 	//printf("hashse='%s'\n", (char *)retval);
 	if (yescrypt_free_local(&local)) {
 		yescrypt_free_shared(&shared);
@@ -329,7 +332,7 @@ uint8_t* yescrypt_gensalt(uint32_t N_log2, uint32_t r, uint32_t p, yescrypt_flag

 static int yescrypt_bsty(const uint8_t * passwd, size_t passwdlen,
    const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p,
-    uint8_t * buf, size_t buflen)
+    uint8_t * buf, size_t buflen, int thrid )
 {
 	static __thread int initialized = 0;
 	static __thread yescrypt_shared_t shared;
@@ -349,7 +352,7 @@ static int yescrypt_bsty(const uint8_t * passwd, size_t passwdlen,
 	}
 	retval = yescrypt_kdf(&shared, &local,
 	    passwd, passwdlen, salt, saltlen, N, r, p, 0, YESCRYPT_FLAGS,
-	    buf, buflen);
+	    buf, buflen, thrid );
 #if 0
 	if (yescrypt_free_local(&local)) {
 		yescrypt_free_shared(&shared);
@@ -370,16 +373,16 @@ char *yescrypt_client_key = NULL;
 int yescrypt_client_key_len = 0;

 /* main hash 80 bytes input */
-void yescrypt_hash( const char *input, char *output, uint32_t len )
+int yescrypt_hash( const char *input, char *output, uint32_t len, int thrid )
 {
-   yescrypt_bsty( (uint8_t*)input, len, (uint8_t*)input, len, YESCRYPT_N,
-                  YESCRYPT_R, YESCRYPT_P, (uint8_t*)output, 32 );
+   return yescrypt_bsty( (uint8_t*)input, len, (uint8_t*)input, len, YESCRYPT_N,
+                  YESCRYPT_R, YESCRYPT_P, (uint8_t*)output, 32, thrid );
 }

 /* for util.c test */
-void yescrypthash(void *output, const void *input)
+int yescrypthash(void *output, const void *input, int thrid)
 {
-	yescrypt_hash((char*) input, (char*) output, 80);
+	return yescrypt_hash((char*) input, (char*) output, 80, thrid);
 }

 int scanhash_yescrypt( struct work *work, uint32_t max_nonce,
@@ -392,13 +395,13 @@ int scanhash_yescrypt( struct work *work, uint32_t max_nonce,
   const uint32_t first_nonce = pdata[19];
   const uint32_t last_nonce = max_nonce;
   uint32_t n = first_nonce;
-   int thr_id = mythr->id;  // thr_id arg is deprecated
+   int thr_id = mythr->id; 

   for ( int k = 0; k < 19; k++ )
      be32enc( &endiandata[k], pdata[k] );
   endiandata[19] = n;
   do {
-      yescrypt_hash((char*) endiandata, (char*) vhash, 80);
+      if ( yescrypt_hash((char*) endiandata, (char*) vhash, 80, thr_id ) )
      if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark )
      {
          be32enc( pdata+19, n );
--- a/algo/yescrypt/yescrypt.h
+++ b/algo/yescrypt/yescrypt.h
@@ -38,12 +38,13 @@ extern "C" {
 #include <stdint.h>
 #include <stdlib.h> /* for size_t */
 #include <stdbool.h>
+#include "miner.h"

 //#define  __SSE4_1__

-void yescrypt_hash(const char* input, char* output, uint32_t len);
+int yescrypt_hash(const char* input, char* output, uint32_t len, int thrid );

-void yescrypthash(void *output, const void *input);
+int yescrypthash(void *output, const void *input, int thrid );

 /**
 * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen):
@@ -301,7 +302,7 @@ extern int yescrypt_kdf(const yescrypt_shared_t * __shared,
    const uint8_t * __salt, size_t __saltlen,
    uint64_t __N, uint32_t __r, uint32_t __p, uint32_t __t,
    yescrypt_flags_t __flags,
-    uint8_t * __buf, size_t __buflen);
+    uint8_t * __buf, size_t __buflen, int thrid);

 /**
 * yescrypt_r(shared, local, passwd, passwdlen, setting, buf, buflen):
@@ -321,7 +322,7 @@ extern uint8_t * yescrypt_r(const yescrypt_shared_t * __shared,
    yescrypt_local_t * __local,
    const uint8_t * __passwd, size_t __passwdlen,
    const uint8_t * __setting,
-    uint8_t * __buf, size_t __buflen);
+    uint8_t * __buf, size_t __buflen, int thrid);

 /**
 * yescrypt(passwd, setting):
@@ -339,7 +340,7 @@ extern uint8_t * yescrypt_r(const yescrypt_shared_t * __shared,
 *
 * MT-unsafe.
 */
-extern uint8_t * yescrypt(const uint8_t * __passwd, const uint8_t * __setting);
+extern uint8_t * yescrypt(const uint8_t * __passwd, const uint8_t * __setting, int thrid );

 /**
 * yescrypt_gensalt_r(N_log2, r, p, flags, src, srclen, buf, buflen):
--- a/algo/yespower/yescrypt-r8g.c
+++ b/algo/yespower/yescrypt-r8g.c
@@ -51,6 +51,10 @@ int scanhash_yespower_r8g( struct work *work, uint32_t max_nonce,
       be32enc( &endiandata[ i], pdata[ i ]);
    endiandata[19] = n;

+// do sha256 prehash
+   SHA256_Init( &sha256_prehash_ctx );
+   SHA256_Update( &sha256_prehash_ctx, endiandata, 64 );
+    
    do {
       yespower_tls( (unsigned char *)endiandata, params.perslen,
                      &params, (yespower_binary_t*)hash, thr_id );
--- a/algo/yespower/yespower-4way.c
+++ b/algo/yespower/yespower-4way.c
@@ -0,0 +1,692 @@
+/*-
+ * Copyright 2009 Colin Percival
+ * Copyright 2013-2018 Alexander Peslyak
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * This file was originally written by Colin Percival as part of the Tarsnap
+ * online backup system.
+ *
+ * This is a proof-of-work focused fork of yescrypt, including reference and
+ * cut-down implementation of the obsolete yescrypt 0.5 (based off its first
+ * submission to PHC back in 2014) and a new proof-of-work specific variation
+ * known as yespower 1.0.  The former is intended as an upgrade for
+ * cryptocurrencies that already use yescrypt 0.5 and the latter may be used
+ * as a further upgrade (hard fork) by those and other cryptocurrencies.  The
+ * version of algorithm to use is requested through parameters, allowing for
+ * both algorithms to co-exist in client and miner implementations (such as in
+ * preparation for a hard-fork).
+ *
+ * This is the reference implementation.  Its purpose is to provide a simple
+ * human- and machine-readable specification that implementations intended
+ * for actual use should be tested against.  It is deliberately mostly not
+ * optimized, and it is not meant to be used in production.  Instead, use
+ * yespower-opt.c.
+ */
+/*
+#warning "This reference implementation is deliberately mostly not optimized. Use yespower-opt.c instead unless you're testing (against) the reference implementation on purpose."
+*/
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "algo/sha/hmac-sha256-hash-4way.h"
+//#include "sysendian.h"
+
+#include "yespower.h"
+
+
+#if defined(__AVX2__)
+
+
+static void blkcpy_8way( __m256i *dst, const __m256i *src, size_t count )
+{
+	do {
+		*dst++ = *src++;
+	} while (--count);
+}
+
+static void blkxor_8way( __m256i *dst, const __m256i *src, size_t count )
+{
+	do {
+		*dst++ ^= *src++;
+	} while (--count);
+}
+
+/**
+ * salsa20(B):
+ * Apply the Salsa20 core to the provided block.
+ */
+static void salsa20_8way( __m256i B[16], uint32_t rounds )
+{
+	__m256i x[16];
+	size_t i;
+
+	/* SIMD unshuffle */
+	for ( i = 0; i < 16; i++ )
+		x[i * 5 % 16] = B[i];
+
+	for ( i = 0; i < rounds; i += 2 )
+   {
+#define R( a, b, c ) mm256_rol_32( _mm256_add_epi32( a, b ), c )
+      /* Operate on columns */
+
+      x[ 4] = _mm256_xor_si256( x[ 4], R( x[ 0], x[12],  7 ) );
+      x[ 8] = _mm256_xor_si256( x[ 8], R( x[ 4], x[ 0],  9 ) );
+      x[12] = _mm256_xor_si256( x[12], R( x[ 8], x[ 4], 13 ) );
+      x[ 0] = _mm256_xor_si256( x[ 0], R( x[12], x[ 8], 18 ) );
+
+      x[ 9] = _mm256_xor_si256( x[ 9], R( x[ 5], x[ 1],  7 ) );
+      x[13] = _mm256_xor_si256( x[13], R( x[ 9], x[ 5],  9 ) );
+      x[ 1] = _mm256_xor_si256( x[ 1], R( x[13], x[ 9], 13 ) );
+      x[ 5] = _mm256_xor_si256( x[ 5], R( x[ 1], x[13], 18 ) );
+
+      x[14] = _mm256_xor_si256( x[14], R( x[10], x[ 6],  7 ) );
+      x[ 2] = _mm256_xor_si256( x[ 2], R( x[14], x[10],  9 ) );
+      x[ 6] = _mm256_xor_si256( x[ 6], R( x[ 2], x[14], 13 ) );
+      x[10] = _mm256_xor_si256( x[10], R( x[ 6], x[ 2], 18 ) );
+
+      x[ 3] = _mm256_xor_si256( x[ 3], R( x[15], x[11],  7 ) );
+      x[ 7] = _mm256_xor_si256( x[ 7], R( x[ 3], x[15],  9 ) );
+      x[11] = _mm256_xor_si256( x[11], R( x[ 7], x[ 3], 13 ) );
+      x[15] = _mm256_xor_si256( x[15], R( x[11], x[ 7], 18 ) );
+
+		/* Operate on rows */
+
+      x[ 1] = _mm256_xor_si256( x[ 1], R( x[ 0], x[ 3],  7 ) );
+      x[ 2] = _mm256_xor_si256( x[ 2], R( x[ 1], x[ 0],  9 ) );
+      x[ 3] = _mm256_xor_si256( x[ 3], R( x[ 2], x[ 1], 13 ) );
+      x[ 0] = _mm256_xor_si256( x[ 0], R( x[ 3], x[ 2], 18 ) );
+
+      x[ 6] = _mm256_xor_si256( x[ 6], R( x[ 5], x[ 4],  7 ) );
+      x[ 7] = _mm256_xor_si256( x[ 7], R( x[ 6], x[ 5],  9 ) );
+      x[ 4] = _mm256_xor_si256( x[ 4], R( x[ 7], x[ 6], 13 ) );
+      x[ 5] = _mm256_xor_si256( x[ 5], R( x[ 4], x[ 7], 18 ) );
+
+      x[11] = _mm256_xor_si256( x[11], R( x[10], x[ 9],  7 ) );
+      x[ 8] = _mm256_xor_si256( x[ 8], R( x[11], x[10],  9 ) );
+      x[ 9] = _mm256_xor_si256( x[ 9], R( x[ 8], x[11], 13 ) );
+      x[10] = _mm256_xor_si256( x[10], R( x[ 9], x[ 8], 18 ) );
+
+      x[12] = _mm256_xor_si256( x[12], R( x[15], x[14],  7 ) );
+      x[13] = _mm256_xor_si256( x[13], R( x[12], x[15],  9 ) );
+      x[14] = _mm256_xor_si256( x[14], R( x[13], x[12], 13 ) );
+      x[15] = _mm256_xor_si256( x[15], R( x[14], x[13], 18 ) );
+
+#undef R
+	}
+
+	/* SIMD shuffle */
+	for (i = 0; i < 16; i++)
+		B[i] = _mm256_add_epi32( B[i], x[i * 5 % 16] );
+}
+
+/**
+ * blockmix_salsa(B):
+ * Compute B = BlockMix_{salsa20, 1}(B).  The input B must be 128 bytes in
+ * length.
+ */
+static void blockmix_salsa_8way( __m256i *B, uint32_t rounds )
+{
+	__m256i X[16];
+	size_t i;
+
+	/* 1: X <-- B_{2r - 1} */
+	blkcpy_8way( X, &B[16], 16 );
+
+	/* 2: for i = 0 to 2r - 1 do */
+	for ( i = 0; i < 2; i++ )
+   {
+		/* 3: X <-- H(X xor B_i) */
+		blkxor_8way( X, &B[i * 16], 16 );
+		salsa20_8way( X, rounds );
+
+		/* 4: Y_i <-- X */
+		/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
+		blkcpy_8way( &B[i * 16], X, 16 );
+	}
+}
+
+/*
+ * These are tunable, but they must meet certain constraints and are part of
+ * what defines a yespower version.
+ */
+#define PWXsimple 2
+#define PWXgather 4
+/* Version 0.5 */
+#define PWXrounds_0_5 6
+#define Swidth_0_5 8
+/* Version 1.0 */
+#define PWXrounds_1_0 3
+#define Swidth_1_0 11
+
+/* Derived values.  Not tunable on their own. */
+#define PWXbytes (PWXgather * PWXsimple * 8)
+#define PWXwords (PWXbytes / sizeof(uint32_t))
+#define rmin ((PWXbytes + 127) / 128)
+
+/* Runtime derived values.  Not tunable on their own. */
+#define Swidth_to_Sbytes1(Swidth) ((1 << Swidth) * PWXsimple * 8)
+#define Swidth_to_Smask(Swidth) (((1 << Swidth) - 1) * PWXsimple * 8)
+
+typedef struct {
+   __m256i (*S0)[2], (*S1)[2], (*S2)[2];
+   __m256i *S;
+	yespower_version_t version;
+	uint32_t salsa20_rounds;
+	uint32_t PWXrounds, Swidth, Sbytes, Smask;
+	size_t w;
+} pwxform_8way_ctx_t __attribute__ ((aligned (128)));
+
+/**
+ * pwxform(B):
+ * Transform the provided block using the provided S-boxes.
+ */
+static void pwxform_8way( __m256i *B, pwxform_8way_ctx_t *ctx )
+{
+	__m256i (*X)[PWXsimple][2] = (__m256i (*)[PWXsimple][2])B;
+	__m256i (*S0)[2] = ctx->S0, (*S1)[2] = ctx->S1, (*S2)[2] = ctx->S2;
+	__m256i Smask = _mm256_set1_epi32( ctx->Smask );
+	size_t w = ctx->w;
+	size_t i, j, k;
+
+	/* 1: for i = 0 to PWXrounds - 1 do */
+	for ( i = 0; i < ctx->PWXrounds; i++ )
+   {
+		/* 2: for j = 0 to PWXgather - 1 do */
+		for ( j = 0; j < PWXgather; j++ )
+      {
+// Are these pointers or data?
+         __m256i xl = X[j][0][0];
+			__m256i xh = X[j][0][1];
+			__m256i (*p0)[2], (*p1)[2];
+
+			// 3: p0 <-- (lo(B_{j,0}) & Smask) / (PWXsimple * 8) 
+
+// playing with pointers
+/*
+         p0 = S0 + (xl & Smask) / sizeof(*S0);
+			// 4: p1 <-- (hi(B_{j,0}) & Smask) / (PWXsimple * 8) 
+			p1 = S1 + (xh & Smask) / sizeof(*S1);
+*/
+			/* 5: for k = 0 to PWXsimple - 1 do */
+			for ( k = 0; k < PWXsimple; k++ )
+         {
+
+// shift from 32 bit data to 64 bit data
+            __m256i x0, x1, s00, s01, s10, s11;
+            __m128i *p0k = (__m128i*)p0[k];
+            __m128i *p1k = (__m128i*)p1[k];
+
+
+           s00 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p0k[0] ),
+                _mm256_slli_epi64( _mm256_cvtepu32_epi64( p0k[2] ), 32 ) );
+           s01 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p0k[1] ),
+                _mm256_slli_epi64( _mm256_cvtepu32_epi64( p0k[3] ), 32 ) );
+           s10 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p1k[0] ),
+                _mm256_slli_epi64( _mm256_cvtepu32_epi64( p1k[2] ), 32 ) );
+           s11 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p1k[1] ),
+                _mm256_slli_epi64( _mm256_cvtepu32_epi64( p1k[3] ), 32 ) );
+
+            __m128i *xx = (__m128i*)X[j][k];
+            x0 = _mm256_mul_epu32( _mm256_cvtepu32_epi64( xx[0] ),
+                                   _mm256_cvtepu32_epi64( xx[2] ) );
+            x1 = _mm256_mul_epu32( _mm256_cvtepu32_epi64( xx[1] ),
+                                   _mm256_cvtepu32_epi64( xx[3] ) );
+
+            x0 = _mm256_add_epi64( x0, s00 );
+            x1 = _mm256_add_epi64( x1, s01 );
+            
+            x0 = _mm256_xor_si256( x0, s10 );
+            x1 = _mm256_xor_si256( x1, s11 );
+
+            X[j][k][0] = x0; 
+            X[j][k][1] = x1;                        
+			}
+
+			if ( ctx->version != YESPOWER_0_5 &&
+			    ( i == 0 || j < PWXgather / 2 ) )
+         {
+				if ( j & 1 )
+            {
+					for ( k = 0; k < PWXsimple; k++ )
+               {
+						S1[w][0] = X[j][k][0];
+						S1[w][1] = X[j][k][1];
+						w++;
+					}
+				}
+            else
+            {
+					for ( k = 0; k < PWXsimple; k++ )
+               {
+						S0[w + k][0] = X[j][k][0];
+						S0[w + k][1] = X[j][k][1];
+					}
+				}
+			}
+		}
+	}
+
+	if ( ctx->version != YESPOWER_0_5 )
+   {
+		/* 14: (S0, S1, S2) <-- (S2, S0, S1) */
+		ctx->S0 = S2;
+		ctx->S1 = S0;
+		ctx->S2 = S1;
+		/* 15: w <-- w mod 2^Swidth */
+		ctx->w = w & ( ( 1 << ctx->Swidth ) * PWXsimple - 1 );
+	}
+}
+
+/**
+ * blockmix_pwxform(B, ctx, r):
+ * Compute B = BlockMix_pwxform{salsa20, ctx, r}(B).  The input B must be
+ * 128r bytes in length.
+ */
+static void blockmix_pwxform_8way( uint32_t *B, pwxform_8way_ctx_t *ctx,
+                                   size_t r )
+{
+	__m256i X[PWXwords];
+	size_t r1, i;
+
+	/* Convert 128-byte blocks to PWXbytes blocks */
+	/* 1: r_1 <-- 128r / PWXbytes */
+	r1 = 128 * r / PWXbytes;
+
+	/* 2: X <-- B'_{r_1 - 1} */
+	blkcpy_8way( X, &B[ (r1 - 1) * PWXwords ], PWXwords );
+
+	/* 3: for i = 0 to r_1 - 1 do */
+	for ( i = 0; i < r1; i++ )
+   {
+		/* 4: if r_1 > 1 */
+		if ( r1 > 1 )
+      {
+			/* 5: X <-- X xor B'_i */
+			blkxor_8way( X, &B[ i * PWXwords ], PWXwords );
+		}
+
+		/* 7: X <-- pwxform(X) */
+		pwxform_8way( X, ctx );
+
+		/* 8: B'_i <-- X */
+		blkcpy_8way( &B[ i * PWXwords ], X, PWXwords );
+	}
+
+	/* 10: i <-- floor((r_1 - 1) * PWXbytes / 64) */
+	i = ( r1 - 1 ) * PWXbytes / 64;
+
+	/* 11: B_i <-- H(B_i) */
+	salsa20_8way( &B[i * 16], ctx->salsa20_rounds );
+
+#if 1 /* No-op with our current pwxform settings, but do it to make sure */
+	/* 12: for i = i + 1 to 2r - 1 do */
+	for ( i++; i < 2 * r; i++ )
+   {
+		/* 13: B_i <-- H(B_i xor B_{i-1}) */
+		blkxor_8way( &B[i * 16], &B[ (i - 1) * 16 ], 16 );
+		salsa20_8way( &B[i * 16], ctx->salsa20_rounds );
+	}
+#endif
+}
+
+// This looks a lot like data dependent addressing
+
+/**
+ * integerify(B, r):
+ * Return the result of parsing B_{2r-1} as a little-endian integer.
+ */
+static __m256i integerify8( const __m256i *B, size_t r )
+{
+/*
+ * Our 32-bit words are in host byte order.  Also, they are SIMD-shuffled, but
+ * we only care about the least significant 32 bits anyway.
+ */
+	const __m256i *X = &B[ (2 * r - 1) * 16 ];
+	return X[0];
+}
+
+/**
+ * p2floor(x):
+ * Largest power of 2 not greater than argument.
+ */
+static uint32_t p2floor8( uint32_t x )
+{
+	uint32_t y;
+	while ( ( y = x & (x - 1) ) )
+		x = y;
+	return x;
+}
+
+/**
+ * wrap(x, i):
+ * Wrap x to the range 0 to i-1.
+ */
+static uint32_t wrap8( uint32_t x, uint32_t i )
+{
+	uint32_t n = p2floor( i );
+	return ( x & (n - 1) ) + (i - n);
+}
+
+/**
+ * smix1(B, r, N, V, X, ctx):
+ * Compute first loop of B = SMix_r(B, N).  The input B must be 128r bytes in
+ * length; the temporary storage V must be 128rN bytes in length; the temporary
+ * storage X must be 128r bytes in length.
+ */
+static void smix1_8way( __m256i *B, size_t r, uint32_t N,
+                        __m256i *V, __m256i *X, pwxform_8way_ctx_t *ctx )
+{
+	size_t s = 32 * r;
+	uint32_t i, j;
+	size_t k;
+
+	/* 1: X <-- B */
+	for ( k = 0; k < 2 * r; k++ )
+		for ( i = 0; i < 16; i++ )
+			X[ k * 16 + i ] = B[ k * 16 + ( i * 5 % 16 ) ];
+
+	if ( ctx->version != YESPOWER_0_5 )
+   {
+		for ( k = 1; k < r; k++ )
+      {
+			blkcpy_8way( &X[k * 32], &X[ (k - 1) * 32 ], 32 );
+			blockmix_pwxform_8way( &X[k * 32], ctx, 1 );
+		}
+	}
+
+	/* 2: for i = 0 to N - 1 do */
+	for ( i = 0; i < N; i++ )
+   {
+		/* 3: V_i <-- X */
+		blkcpy_8way( &V[i * s], X, s );
+
+		if ( i > 1 )
+      {
+
+// is j int or vector? Integrify has data dependent addressing?
+
+         /* j <-- Wrap(Integerify(X), i) */
+//			j = wrap8( integerify8( X, r ), i );
+
+			/* X <-- X xor V_j */
+			blkxor_8way( X, &V[j * s], s );
+		}
+
+		/* 4: X <-- H(X) */
+		if ( V != ctx->S )
+			blockmix_pwxform_8way( X, ctx, r );
+		else
+			blockmix_salsa_8way( X, ctx->salsa20_rounds );
+	}
+
+	/* B' <-- X */
+	for ( k = 0; k < 2 * r; k++ )
+		for ( i = 0; i < 16; i++ )
+			B[ k * 16 + ( i * 5 % 16 ) ] = X[ k * 16 + i ];
+}
+
+/**
+ * smix2(B, r, N, Nloop, V, X, ctx):
+ * Compute second loop of B = SMix_r(B, N).  The input B must be 128r bytes in
+ * length; the temporary storage V must be 128rN bytes in length; the temporary
+ * storage X must be 128r bytes in length.  The value N must be a power of 2
+ * greater than 1.
+ */
+static void smix2_8way( __m256i *B, size_t r, uint32_t N, uint32_t Nloop,
+                        __m256i *V, __m256i *X, pwxform_8way_ctx_t *ctx )
+{
+	size_t s = 32 * r;
+	uint32_t i, j;
+	size_t k;
+
+	/* X <-- B */
+	for ( k = 0; k < 2 * r; k++ )
+		for ( i = 0; i < 16; i++ )
+			X[ k * 16 + i ] = B[ k * 16 + ( i * 5 % 16 ) ];
+
+	/* 6: for i = 0 to N - 1 do */
+	for ( i = 0; i < Nloop; i++ )
+   {
+		/* 7: j <-- Integerify(X) mod N */
+//		j = integerify8(X, r) & (N - 1);
+
+		/* 8.1: X <-- X xor V_j */
+		blkxor_8way( X, &V[j * s], s );
+		/* V_j <-- X */
+		if ( Nloop != 2 )
+			blkcpy_8way( &V[j * s], X, s );
+
+		/* 8.2: X <-- H(X) */
+		blockmix_pwxform_8way( X, ctx, r );
+	}
+
+	/* 10: B' <-- X */
+	for ( k = 0; k < 2 * r; k++ )
+		for ( i = 0; i < 16; i++ )
+			B[ k * 16 + ( i * 5 % 16 ) ] = X[ k * 16 + i ];
+}
+
+/**
+ * smix(B, r, N, p, t, V, X, ctx):
+ * Compute B = SMix_r(B, N).  The input B must be 128rp bytes in length; the
+ * temporary storage V must be 128rN bytes in length; the temporary storage
+ * X must be 128r bytes in length.  The value N must be a power of 2 and at
+ * least 16.
+ */
+static void smix_8way( __m256i *B, size_t r, uint32_t N,
+                       __m256i *V, __m256i *X, pwxform_8way_ctx_t *ctx)
+{
+	uint32_t Nloop_all = (N + 2) / 3; /* 1/3, round up */
+	uint32_t Nloop_rw = Nloop_all;
+
+	Nloop_all++; Nloop_all &= ~(uint32_t)1; /* round up to even */
+
+   if ( ctx->version == YESPOWER_0_5 )
+		Nloop_rw &= ~(uint32_t)1; /* round down to even */
+	else
+		Nloop_rw++; Nloop_rw &= ~(uint32_t)1; /* round up to even */
+
+	smix1_8way( B, 1, ctx->Sbytes / 128, ctx->S, X, ctx );
+	smix1_8way( B, r, N, V, X, ctx );
+	smix2_8way( B, r, N, Nloop_rw /* must be > 2 */, V, X, ctx );
+	smix2_8way( B, r, N, Nloop_all - Nloop_rw /* 0 or 2 */, V, X, ctx );
+}
+
+/**
+ * yespower(local, src, srclen, params, dst):
+ * Compute yespower(src[0 .. srclen - 1], N, r), to be checked for "< target".
+ *
+ * Return 0 on success; or -1 on error.
+ */
+int yespower_8way( yespower_local_t *local, const __m256i *src, size_t srclen,
+              const yespower_params_t *params, yespower_8way_binary_t *dst,
+              int thrid )
+{
+	yespower_version_t version = params->version;
+	uint32_t N = params->N;
+	uint32_t r = params->r;
+	const uint8_t *pers = params->pers;
+	size_t perslen = params->perslen;
+	int retval = -1;
+	size_t B_size, V_size;
+	uint32_t *B, *V, *X, *S;
+	pwxform_8way_ctx_t ctx;
+	__m256i sha256[8];
+
+	/* Sanity-check parameters */
+	if ( (version != YESPOWER_0_5 && version != YESPOWER_1_0 ) ||
+	    N < 1024 || N > 512 * 1024 || r < 8 || r > 32 ||
+	    (N & (N - 1)) != 0 || r < rmin ||
+	    (!pers && perslen) )
+   {
+		errno = EINVAL;
+		return -1;
+	}
+
+	/* Allocate memory */
+	B_size = (size_t)128 * r;
+	V_size = B_size * N;
+	if ((V = malloc(V_size)) == NULL)
+		return -1;
+	if ((B = malloc(B_size)) == NULL)
+		goto free_V;
+	if ((X = malloc(B_size)) == NULL)
+		goto free_B;
+	ctx.version = version;
+	if (version == YESPOWER_0_5) {
+		ctx.salsa20_rounds = 8;
+		ctx.PWXrounds = PWXrounds_0_5;
+		ctx.Swidth = Swidth_0_5;
+		ctx.Sbytes = 2 * Swidth_to_Sbytes1(ctx.Swidth);
+	} else {
+		ctx.salsa20_rounds = 2;
+		ctx.PWXrounds = PWXrounds_1_0;
+		ctx.Swidth = Swidth_1_0;
+		ctx.Sbytes = 3 * Swidth_to_Sbytes1(ctx.Swidth);
+	}
+	if ((S = malloc(ctx.Sbytes)) == NULL)
+		goto free_X;
+	ctx.S = S;
+	ctx.S0 = (__m256i (*)[2])S;
+	ctx.S1 = ctx.S0 + (1 << ctx.Swidth) * PWXsimple;
+	ctx.S2 = ctx.S1 + (1 << ctx.Swidth) * PWXsimple;
+	ctx.Smask = Swidth_to_Smask(ctx.Swidth);
+	ctx.w = 0;
+
+   // do prehash
+	sha256_8way_full( sha256, src, srclen );
+
+
+  // need flexible size, use malloc;
+   __m256i vpers[128];
+
+	if ( version != YESPOWER_0_5 && perslen )
+      for ( int i = 0; i < perslen/4 + 1; i++ )
+         vpers[i] = _mm256_set1_epi32( pers[i] );
+
+	/* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
+	pbkdf2_sha256_8way( B, B_size, sha256, sizeof(sha256), vpers, perslen, 1 );
+
+	blkcpy_8way( sha256, B, sizeof(sha256) / sizeof(sha256[0] ) );
+
+	/* 3: B_i <-- MF(B_i, N) */
+	smix_8way( B, r, N, V, X, &ctx );
+
+	if ( version == YESPOWER_0_5 )
+   {
+		/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
+		pbkdf2_sha256_8way( dst, sizeof(*dst), sha256, sizeof(sha256),
+                          B, B_size, 1 );
+
+		if ( pers )
+      {
+			hmac_sha256_8way_full( dst, sizeof(*dst), vpers, perslen, sha256 );
+			sha256_8way_full( dst, sha256, sizeof(sha256) );
+		}
+	}
+   else
+		hmac_sha256_8way_full( dst, B + B_size - 64, 64, sha256, sizeof(sha256) );
+
+	/* Success! */
+	retval = 1;
+
+	/* Free memory */
+	free(S);
+free_X:
+	free(X);
+free_B:
+	free(B);
+free_V:
+	free(V);
+
+	return retval;
+}
+
+int yespower_8way_tls( const __m256i *src, size_t srclen,
+    const yespower_params_t *params, yespower_8way_binary_t *dst, int trhid )
+{
+/* The reference implementation doesn't use thread-local storage */
+	return yespower_8way( NULL, src, srclen, params, dst, trhid );
+}
+
+int yespower_init_local8( yespower_local_t *local )
+{
+/* The reference implementation doesn't use the local structure */
+	local->base = local->aligned = NULL;
+	local->base_size = local->aligned_size = 0;
+	return 0;
+}
+
+int yespower_free_local8( yespower_local_t *local )
+{
+/* The reference implementation frees its memory in yespower() */
+	(void)local; /* unused */
+	return 0;
+}
+
+int yespower_8way_hash( const char *input, char *output, uint32_t len,
+                        int thrid )
+{
+   return yespower_8way_tls( input, len, &yespower_params,
+           (yespower_binary_t*)output, thrid );
+}
+
+int scanhash_yespower_8way( struct work *work, uint32_t max_nonce,
+                            uint64_t *hashes_done, struct thr_info *mythr )
+{
+   uint32_t _ALIGN(128) hash[8*8];
+   uint32_t _ALIGN(128) vdata[20*8];
+   uint32_t _ALIGN(128) endiandata[20];
+   uint32_t *pdata = work->data;
+   uint32_t *ptarget = work->target;
+   const uint32_t first_nonce = pdata[19];
+   const uint32_t last_nonce = max_nonce;
+   uint32_t n = first_nonce;
+   const int thr_id = mythr->id;
+
+   for ( int k = 0; k < 19; k++ )
+      be32enc( &endiandata[k], pdata[k] );
+   endiandata[19] = n;
+
+// do sha256 prehash
+   SHA256_Init( &sha256_prehash_ctx );
+   SHA256_Update( &sha256_prehash_ctx, endiandata, 64 );
+
+   do {
+      if ( yespower_hash( vdata, hash, 80, thr_id ) )
+      if unlikely( valid_hash( hash, ptarget ) && !opt_benchmark )
+      {
+          be32enc( pdata+19, n );
+          submit_solution( work, hash, mythr );
+      }
+      endiandata[19] = ++n;
+   } while ( n < last_nonce && !work_restart[thr_id].restart );
+   *hashes_done = n - first_nonce;
+   pdata[19] = n;
+   return 0;
+}
+
+#endif  // AVX2
--- a/algo/yespower/yespower-gate.c
+++ b/algo/yespower/yespower-gate.c
@@ -30,7 +30,10 @@

 #include "algo-gate-api.h"

-static yespower_params_t yespower_params;
+yespower_params_t yespower_params;
+
+//SHA256_CTX sha256_prehash_ctx;
+__thread SHA256_CTX sha256_prehash_ctx;

 // YESPOWER

@@ -55,6 +58,11 @@ int scanhash_yespower( struct work *work, uint32_t max_nonce,
   for ( int k = 0; k < 19; k++ )
      be32enc( &endiandata[k], pdata[k] );
   endiandata[19] = n;
+
+// do sha256 prehash
+   SHA256_Init( &sha256_prehash_ctx );
+   SHA256_Update( &sha256_prehash_ctx, endiandata, 64 );
+
   do {
      if ( yespower_hash( (char*)endiandata, (char*)vhash, 80, thr_id ) )
      if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark )
@@ -86,11 +94,16 @@ int scanhash_yespower_b2b( struct work *work, uint32_t max_nonce,
   const uint32_t first_nonce = pdata[19];
   uint32_t n = first_nonce;
   const uint32_t last_nonce = max_nonce;
-   const int thr_id = mythr->id;  // thr_id arg is deprecated
+   const int thr_id = mythr->id;

   for ( int k = 0; k < 19; k++ )
      be32enc( &endiandata[k], pdata[k] );
   endiandata[19] = n;
+
+// do sha256 prehash
+   SHA256_Init( &sha256_prehash_ctx );
+   SHA256_Update( &sha256_prehash_ctx, endiandata, 64 );
+
   do {
      if (yespower_b2b_hash( (char*) endiandata, (char*) vhash, 80, thr_id ) )
      if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark )
@@ -152,7 +165,7 @@ bool register_yespowerr16_algo( algo_gate_t* gate )
  return true;
 };

-/* not used
+/* not used, doesn't work
 bool register_yescrypt_05_algo( algo_gate_t* gate )
 {
   gate->optimizations = SSE2_OPT | SHA_OPT;
@@ -166,6 +179,40 @@ bool register_yescrypt_05_algo( algo_gate_t* gate )
   return true;
 }

+bool register_yescrypt_05_algo( algo_gate_t* gate )
+{
+   gate->optimizations = SSE2_OPT | SHA_OPT;
+   gate->scanhash   = (void*)&scanhash_yespower;
+   yespower_params.version = YESPOWER_0_5;
+
+   if ( opt_param_n )  yespower_params.N = opt_param_n;
+   else                yespower_params.N = 2048;
+
+   if ( opt_param_r )  yespower_params.r = opt_param_r;
+   else                yespower_params.r = 8;
+
+   if ( opt_param_key )
+   {
+     yespower_params.pers = opt_param_key;
+     yespower_params.perslen = strlen( opt_param_key );
+   }
+   else
+   {
+     yespower_params.pers = NULL;
+     yespower_params.perslen = 0;
+   }
+
+//   YESCRYPT_P = 1;
+
+   applog( LOG_NOTICE,"Yescrypt parameters: N= %d, R= %d.",
+                                      yespower_params.N, yespower_params.r );
+   if ( yespower_params.pers )
+     applog( LOG_NOTICE,"Key= \"%s\"\n", yespower_params.pers );
+
+   return true;
+}
+
+
 bool register_yescryptr8_05_algo( algo_gate_t* gate )
 {
   gate->optimizations = SSE2_OPT | SHA_OPT;
@@ -222,7 +269,7 @@ bool register_power2b_algo( algo_gate_t* gate )
  applog( LOG_NOTICE,"Key= \"%s\"", yespower_params.pers );
  applog( LOG_NOTICE,"Key length= %d\n", yespower_params.perslen );

-  gate->optimizations = SSE2_OPT | SHA_OPT;
+  gate->optimizations = SSE2_OPT;
  gate->scanhash      = (void*)&scanhash_yespower_b2b;
  gate->hash          = (void*)&yespower_b2b_hash;
  opt_target_factor = 65536.0;
--- a/algo/yespower/yespower-opt.c
+++ b/algo/yespower/yespower-opt.c
@@ -96,6 +96,8 @@
 #include <stdlib.h>
 #include <string.h>
 #include "algo/sha/hmac-sha256-hash.h"
+#include "algo/sha/hmac-sha256-hash-4way.h"
+
 #include "yespower.h"
 #include "yespower-platform.c"

@@ -1038,12 +1040,13 @@ int yespower(yespower_local_t *local,
 	salsa20_blk_t *V, *XY;
 	pwxform_ctx_t ctx;
 	uint8_t sha256[32];
+   SHA256_CTX sha256_ctx;

 	/* Sanity-check parameters */
-	if ((version != YESPOWER_0_5 && version != YESPOWER_1_0) ||
-	    N < 1024 || N > 512 * 1024 || r < 8 || r > 32 ||
-	    (N & (N - 1)) != 0 ||
-	    (!pers && perslen)) {
+	if ( (version != YESPOWER_0_5 && version != YESPOWER_1_0)
+      || N < 1024 || N > 512 * 1024 || r < 8 || r > 32
+      || (N & (N - 1)) != 0 || ( !pers && perslen ) )
+   {
 		errno = EINVAL;
 		return -1;
 	}
@@ -1051,20 +1054,22 @@ int yespower(yespower_local_t *local,
 	/* Allocate memory */
 	B_size = (size_t)128 * r;
 	V_size = B_size * N;
-	if (version == YESPOWER_0_5) {
+	if ( version == YESPOWER_0_5 )
+   {
 		XY_size = B_size * 2;
 		Swidth = Swidth_0_5;
-		ctx.Sbytes = 2 * Swidth_to_Sbytes1(Swidth);
+		ctx.Sbytes = 2 * Swidth_to_Sbytes1( Swidth );
 	} else {
 		XY_size = B_size + 64;
 		Swidth = Swidth_1_0;
-		ctx.Sbytes = 3 * Swidth_to_Sbytes1(Swidth);
+		ctx.Sbytes = 3 * Swidth_to_Sbytes1( Swidth );
 	}
 	need = B_size + V_size + XY_size + ctx.Sbytes;
-	if (local->aligned_size < need) {
-		if (free_region(local))
+	if ( local->aligned_size < need )
+   {
+		if ( free_region( local ) )
 			return -1;
-		if (!alloc_region(local, need))
+		if ( !alloc_region( local, need ) )
 			return -1;
 	}
 	B = (uint8_t *)local->aligned;
@@ -1072,48 +1077,81 @@ int yespower(yespower_local_t *local,
 	XY = (salsa20_blk_t *)((uint8_t *)V + V_size);
 	S = (uint8_t *)XY + XY_size;
 	ctx.S0 = S;
-	ctx.S1 = S + Swidth_to_Sbytes1(Swidth);
+	ctx.S1 = S + Swidth_to_Sbytes1( Swidth );

-	SHA256_Buf(src, srclen, sha256);

-	if (version == YESPOWER_0_5) {
-		PBKDF2_SHA256(sha256, sizeof(sha256), src, srclen, 1,
-		    B, B_size);
+// copy prehash, do tail   
+   memcpy( &sha256_ctx, &sha256_prehash_ctx, sizeof sha256_ctx );
+   SHA256_Update( &sha256_ctx, src+64, srclen-64 );
+   SHA256_Final( sha256, &sha256_ctx );

-      if ( work_restart[thrid].restart ) return false;
+//   SHA256_Buf(src, srclen, sha256);
+
+	if ( version == YESPOWER_0_5 )
+   {
+      PBKDF2_SHA256( sha256, sizeof(sha256), src, srclen, 1, B, B_size );
+
+      if ( work_restart[thrid].restart ) return 0;
   
-      memcpy(sha256, B, sizeof(sha256));
-		smix(B, r, N, V, XY, &ctx);
+      memcpy( sha256, B, sizeof(sha256) );
+		smix( B, r, N, V, XY, &ctx );

-      if ( work_restart[thrid].restart ) return false;
+      if ( work_restart[thrid].restart ) return 0;

-      PBKDF2_SHA256(sha256, sizeof(sha256), B, B_size, 1,
-		    (uint8_t *)dst, sizeof(*dst));
+      PBKDF2_SHA256( sha256, sizeof(sha256), B, B_size, 1, (uint8_t *)dst,
+                     sizeof(*dst) );

-		if (pers) {
-			HMAC_SHA256_Buf(dst, sizeof(*dst), pers, perslen,
-			    sha256);
+      if ( work_restart[thrid].restart ) return 0;

-         if ( work_restart[thrid].restart ) return false;
+      if ( pers )
+      {
+         src = pers;
+         srclen = perslen;
+      }
+      else
+         srclen = 0;
+      
+      HMAC_SHA256_CTX ctx;
+      HMAC_SHA256_Init( &ctx, dst, sizeof(*dst) );
+      HMAC_SHA256_Update( &ctx, src, srclen );
+      HMAC_SHA256_Final( sha256, &ctx );

-         SHA256_Buf(sha256, sizeof(sha256), (uint8_t *)dst);
+//      SHA256_CTX ctx;
+      SHA256_Init( &sha256_ctx );
+      SHA256_Update( &sha256_ctx, sha256, sizeof(sha256) );
+      SHA256_Final( (unsigned char*)dst, &sha256_ctx );
+
+
+/*
+      if ( pers )
+      {
+			HMAC_SHA256_Buf( dst, sizeof(*dst), pers, perslen, sha256 );
+         SHA256_Buf( sha256, sizeof(sha256), (uint8_t *)dst );
 		}
-	} else {
-		ctx.S2 = S + 2 * Swidth_to_Sbytes1(Swidth);
+*/
+   }
+   else
+   {
+		ctx.S2 = S + 2 * Swidth_to_Sbytes1( Swidth );
 		ctx.w = 0;

-		if (pers) {
+		if ( pers )
+      {
 			src = pers;
 			srclen = perslen;
-		} else {
-			srclen = 0;
 		}
+      else
+			srclen = 0;

-		PBKDF2_SHA256(sha256, sizeof(sha256), src, srclen, 1, B, 128);
-		memcpy(sha256, B, sizeof(sha256));
-		smix_1_0(B, r, N, V, XY, &ctx);
-		HMAC_SHA256_Buf(B + B_size - 64, 64,
-		    sha256, sizeof(sha256), (uint8_t *)dst);
+		PBKDF2_SHA256( sha256, sizeof(sha256), src, srclen, 1, B, 128 );
+		memcpy( sha256, B, sizeof(sha256) );
+
+      if ( work_restart[thrid].restart ) return 0;
+
+      smix_1_0( B, r, N, V, XY, &ctx );
+
+      HMAC_SHA256_Buf( B + B_size - 64, 64, sha256, sizeof(sha256),
+                       (uint8_t *)dst );
 	}

 	/* Success! */
--- a/algo/yespower/yespower-ref.c
+++ b/algo/yespower/yespower-ref.c
@@ -453,9 +453,8 @@ static void smix(uint32_t *B, size_t r, uint32_t N,
 *
 * Return 0 on success; or -1 on error.
 */
-int yespower(yespower_local_t *local,
-    const uint8_t *src, size_t srclen,
-    const yespower_params_t *params, yespower_binary_t *dst)
+int yespower( yespower_local_t *local, const uint8_t *src, size_t srclen,
+    const yespower_params_t *params, yespower_binary_t *dst, int thrid ) 
 {
 	yespower_version_t version = params->version;
 	uint32_t N = params->N;
@@ -534,17 +533,16 @@ int yespower(yespower_local_t *local,

 		if (pers) {
 			HMAC_SHA256_Buf(dst, sizeof(*dst), pers, perslen,
-               return true;
 			    (uint8_t *)sha256);
 			SHA256_Buf(sha256, sizeof(sha256), (uint8_t *)dst);
 		}
 	} else {
-		HMAC_SHA256_Buf_P((uint8_t *)B + B_size - 64, 64,
+		HMAC_SHA256_Buf((uint8_t *)B + B_size - 64, 64,
 		    sha256, sizeof(sha256), (uint8_t *)dst);
 	}

 	/* Success! */
-	retval = 0;
+	retval = 1;

 	/* Free memory */
 	free(S);
@@ -559,10 +557,10 @@ free_V:
 }

 int yespower_tls(const uint8_t *src, size_t srclen,
-    const yespower_params_t *params, yespower_binary_t *dst)
+    const yespower_params_t *params, yespower_binary_t *dst, int thrid )
 {
 /* The reference implementation doesn't use thread-local storage */
-	return yespower(NULL, src, srclen, params, dst);
+	return yespower(NULL, src, srclen, params, dst, thrid );
 }

 int yespower_init_local(yespower_local_t *local)
--- a/algo/yespower/yespower.h
+++ b/algo/yespower/yespower.h
@@ -33,6 +33,8 @@
 #include <stdint.h>
 #include <stdlib.h> /* for size_t */
 #include "miner.h"
+#include "simd-utils.h"
+#include <openssl/sha.h>

 #ifdef __cplusplus
 extern "C" {
@@ -74,6 +76,11 @@ typedef struct {
 	unsigned char uc[32];
 } yespower_binary_t __attribute__ ((aligned (64)));

+yespower_params_t yespower_params;
+
+//SHA256_CTX sha256_prehash_ctx;
+extern __thread SHA256_CTX sha256_prehash_ctx;
+
 /**
 * yespower_init_local(local):
 * Initialize the thread-local (RAM) data structure.  Actual memory allocation
@@ -131,6 +138,24 @@ extern int yespower_tls(const uint8_t *src, size_t srclen,
 extern int yespower_b2b_tls(const uint8_t *src, size_t srclen,
    const yespower_params_t *params, yespower_binary_t *dst, int thr_id);

+
+#if defined(__AVX2__)
+
+typedef struct
+{
+   __m256i uc[8];
+} yespower_8way_binary_t __attribute__ ((aligned (128)));
+
+extern int yespower_8way( yespower_local_t *local, const __m256i *src,
+                          size_t srclen, const yespower_params_t *params,
+                          yespower_8way_binary_t *dst, int thrid );
+
+
+extern int yespower_8way_tls( const __m256i *src, size_t srclen,
+    const yespower_params_t *params, yespower_8way_binary_t *dst, int thr_id );
+
+#endif // AVX2
+
 #ifdef __cplusplus
 }
 #endif
--- a/20
+++ b/20
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.6.1.
+# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.8.1.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='cpuminer-opt'
 PACKAGE_TARNAME='cpuminer-opt'
-PACKAGE_VERSION='3.12.6.1'
-PACKAGE_STRING='cpuminer-opt 3.12.6.1'
+PACKAGE_VERSION='3.12.8.1'
+PACKAGE_STRING='cpuminer-opt 3.12.8.1'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''

@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures cpuminer-opt 3.12.6.1 to adapt to many kinds of systems.
+\`configure' configures cpuminer-opt 3.12.8.1 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@@ -1404,7 +1404,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of cpuminer-opt 3.12.6.1:";;
+     short | recursive ) echo "Configuration of cpuminer-opt 3.12.8.1:";;
   esac
  cat <<\_ACEOF

@@ -1509,7 +1509,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-cpuminer-opt configure 3.12.6.1
+cpuminer-opt configure 3.12.8.1
 generated by GNU Autoconf 2.69

 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by cpuminer-opt $as_me 3.12.6.1, which was
+It was created by cpuminer-opt $as_me 3.12.8.1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  $ $0 $@
@@ -2993,7 +2993,7 @@ fi

 # Define the identity of the package.
 PACKAGE='cpuminer-opt'
- VERSION='3.12.6.1'
+ VERSION='3.12.8.1'


 cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by cpuminer-opt $as_me 3.12.6.1, which was
+This file was extended by cpuminer-opt $as_me 3.12.8.1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-cpuminer-opt config.status 3.12.6.1
+cpuminer-opt config.status 3.12.8.1
 configured by $0, generated by GNU Autoconf 2.69,
  with options \\"\$ac_cs_config\\"

--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([cpuminer-opt], [3.12.6.1])
+AC_INIT([cpuminer-opt], [3.12.8.1])

 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -193,11 +193,11 @@ static uint64_t submit_sum  = 0;
 static uint64_t accept_sum  = 0;
 static uint64_t stale_sum  = 0;
 static uint64_t reject_sum  = 0;
+static uint64_t solved_sum  = 0;
 static double   norm_diff_sum = 0.;
 static uint32_t last_block_height = 0;
-static double   highest_share = 0;   // all shares include discard and reject
-static double   lowest_share = 9e99; // lowest accepted
-//static bool     new_job = false;
+static double   highest_share = 0;   // highest accepted share diff
+static double   lowest_share = 9e99; // lowest accepted share diff
 static double   last_targetdiff = 0.;
 #if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32))
 static uint32_t hi_temp = 0;
@@ -211,7 +211,6 @@ static char const short_options[] =
 	"a:b:Bc:CDf:hK:m:n:N:p:Px:qr:R:s:t:T:o:u:O:V";

 static struct work g_work __attribute__ ((aligned (64))) = {{ 0 }};
-//static struct work tmp_work;
 time_t g_work_time = 0;
 static        pthread_mutex_t g_work_lock;
 static bool   submit_old = false;
@@ -452,7 +451,7 @@ static bool work_decode( const json_t *val, struct work *work )
        return false;
    if ( !allow_mininginfo )
        net_diff = algo_gate.calc_network_diff( work );
-    work->targetdiff = target_to_diff( work->target );
+    work->targetdiff = hash_to_diff( work->target );
    stratum_diff = last_targetdiff = work->targetdiff;
    work->sharediff = 0;
    algo_gate.decode_extra_data( work, &net_blocks );
@@ -907,10 +906,13 @@ static inline void sprintf_et( char *str, int seconds )
   else         // 0m00s
      sprintf( str, "%um%02us", min, sec );
 }
-   
-const double exp32 = 4294967296.;                // 2**32
-const double exp48 = 4294967296. * 65536.;       // 2**48
-const double exp64 = 4294967296. * 4294967296.;  // 2**64
+
+const long double exp32 = EXP32;                                  // 2**32
+const long double exp48 = EXP32 * EXP16;                          // 2**48
+const long double exp64 = EXP32 * EXP32;                          // 2**64
+const long double exp96 = EXP32 * EXP32 * EXP32;                  // 2**96
+const long double exp128 = EXP32 * EXP32 * EXP32 * EXP32;         // 2**128
+const long double exp160 = EXP32 * EXP32 * EXP32 * EXP32 * EXP16; // 2**160

 struct share_stats_t
 {
@@ -954,6 +956,7 @@ void report_summary_log( bool force )
   uint64_t accepts = accept_sum;  accept_sum = 0;
   uint64_t rejects = reject_sum;  reject_sum = 0;
   uint64_t stales  = stale_sum;   stale_sum  = 0;
+   uint64_t solved  = solved_sum;  solved_sum = 0;

   memcpy( &start_time, &five_min_start, sizeof start_time );
   memcpy( &five_min_start, &now, sizeof now );
@@ -968,8 +971,9 @@ void report_summary_log( bool force )
   double shrate = share_time == 0. ? 0. : exp32 * last_targetdiff
                                           * (double)(accepts) / share_time;
   double sess_hrate = uptime.tv_sec == 0. ? 0. : exp32 * norm_diff_sum
-                                                   / (double)uptime.tv_sec;
-   double submit_rate = share_time == 0. ? 0. : (double)submits*60. / share_time;
+                                                / (double)uptime.tv_sec;
+   double submit_rate = share_time == 0. ? 0. : (double)submits*60.
+                                                / share_time;
   char shr_units[4] = {0};
   char ghr_units[4] = {0};
   char sess_hr_units[4] = {0};
@@ -1020,8 +1024,8 @@ void report_summary_log( bool force )
      applog2( LOG_INFO,"Rejected         %6d       %6d",
                       rejects, rejected_share_count );
   if ( solved_block_count )
-      applog2( LOG_INFO,"Blocks Solved                 %6d",
-                         solved_block_count );
+      applog2( LOG_INFO,"Blocks Solved    %6d       %6d",
+                         solved, solved_block_count );
   applog2( LOG_INFO, "Hi/Lo Share Diff  %.5g /  %.5g",
                       highest_share, lowest_share );

@@ -1079,7 +1083,8 @@ static int share_result( int result, struct work *work,
   if ( likely( result ) )
   {
      accepted_share_count++;
-      if ( my_stats.share_diff < lowest_share )
+      if ( ( my_stats.share_diff > 0. ) 
+        && ( my_stats.share_diff < lowest_share ) )
         lowest_share = my_stats.share_diff;
      if ( my_stats.share_diff > highest_share )
         highest_share = my_stats.share_diff;
@@ -1117,7 +1122,6 @@ static int share_result( int result, struct work *work,
        rejected_share_count++;
        sprintf( sres, "S%d", stale_share_count );
        sprintf( rres, "Rejected %d" , rejected_share_count );
-//        lowdiff_debug = true;
     }
   }

@@ -1132,6 +1136,7 @@ static int share_result( int result, struct work *work,
   {
      accept_sum++;
      norm_diff_sum += my_stats.target_diff;
+      if ( solved ) solved_sum++;
   }
   else
   {
@@ -1183,19 +1188,17 @@ static int share_result( int result, struct work *work,
         applog( LOG_WARNING, "Reject reason: %s", reason );
         
      // display share hash and target for troubleshooting
-      diff_to_target( str, my_stats.share_diff );
-      applog2( LOG_INFO, "Hash:   %08x%08x%08x%08x...",
-                         str[7], str[6], str[5], str[4] );
+      diff_to_hash( str, my_stats.share_diff );
+      applog2( LOG_INFO, "Hash:   %08x%08x%08x...", str[7], str[6], str[5] );
      uint32_t *targ;
      if ( work )
         targ = work->target;
      else
      {
-         diff_to_target( str, my_stats.target_diff );
+         diff_to_hash( str, my_stats.target_diff );
         targ = &str[0];
      }
-      applog2( LOG_INFO, "Target: %08x%08x%08x%08x...",
-                         targ[7], targ[6], targ[5], targ[4] );
+      applog2( LOG_INFO, "Target: %08x%08x%08x...", targ[7], targ[6], targ[5] );
   }
   return 1;
 }
@@ -1309,7 +1312,6 @@ char* std_malloc_txs_request( struct work *work )
  json_t *val;
  char data_str[2 * sizeof(work->data) + 1];
  int i;
-
  int datasize = work->sapling ? 112 : 80;

  for ( i = 0; i < ARRAY_SIZE(work->data); i++ )
@@ -1678,7 +1680,7 @@ static bool get_work(struct thr_info *thr, struct work *work)
 	return true;
 }

-bool submit_work( struct thr_info *thr, const struct work *work_in )
+static bool submit_work( struct thr_info *thr, const struct work *work_in )
 {
 	struct workio_cmd *wc;

@@ -1702,20 +1704,8 @@ err_out:
 	return false;
 }

-/*
-// __float128?
-// Convert little endian 256 bit (38 decimal digits) unsigned integer to
-// double precision floating point with 15 decimal digits precision.
-static inline double u256_to_double( const uint64_t *u )
-{
-   return ( ( u[3] * exp64 + u[2] ) * exp64 + u[1] ) * exp64 + u[0];
-}
-*/
-
 static void update_submit_stats( struct work *work, const void *hash )
 {
-//   work->sharediff = hash ? exp32 / ( (uint64_t*)hash )[3] : 0.;
-
   pthread_mutex_lock( &stats_lock );

   submitted_share_count++;
@@ -1735,19 +1725,17 @@ static void update_submit_stats( struct work *work, const void *hash )
 bool submit_solution( struct work *work, const void *hash,
                      struct thr_info *thr )
 {
-   work->sharediff = hash ? exp32 / ( (uint64_t*)hash )[3] : 0.;
-
+   work->sharediff = hash_to_diff( hash );
   if ( likely( submit_work( thr, work ) ) )
-  {
+   {
     update_submit_stats( work, hash );
-
     if ( !opt_quiet )
     {
        if ( have_stratum )
           applog( LOG_NOTICE, "%d Submitted Diff %.5g, Block %d, Job %s",
                   submitted_share_count, work->sharediff, work->height,
                   work->job_id );
-         else
+        else
           applog( LOG_NOTICE, "%d Submitted Diff %.5g, Block %d, Ntime %08x",
                   submitted_share_count, work->sharediff, work->height,
                   work->data[ algo_gate.ntime_index ] );
@@ -1763,49 +1751,10 @@ bool submit_solution( struct work *work, const void *hash,
                                    t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0]);
     }
     return true;
-  }
-  else
+   }
+   else
     applog( LOG_WARNING, "%d failed to submit share", submitted_share_count );
-  return false;
-}
-
-// deprecated, use submit_solution
-bool submit_lane_solution( struct work *work, const void *hash,
-                           struct thr_info *thr, const int lane )
-{
-   work->sharediff = hash ? exp32 / ( (uint64_t*)hash )[3] : 0.;
-
-   if ( likely( submit_work( thr, work ) ) )
-   {
-     update_submit_stats( work, hash );
-
-     if ( !opt_quiet )
-     {
-        if ( have_stratum )
-           applog( LOG_NOTICE, "%d Submitted Diff %.5g, Block %d, Job %s",
-                   submitted_share_count, work->sharediff, work->height,
-                   work->job_id );
-        else
-           applog( LOG_NOTICE, "%d Submitted Diff %.5g, Block %d, Ntime %08x",
-                   submitted_share_count, work->sharediff, work->height, 
-                   work->data[ algo_gate.ntime_index ] );
-     }
-
-     if ( lowdiff_debug )
-     {
-        uint32_t* h = (uint32_t*)hash;
-        uint32_t* t = (uint32_t*)work->target;
-        applog(LOG_INFO,"Hash[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
-                                    h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0]);
-         applog(LOG_INFO,"Targ[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
-                                     t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0]);
-    }
-    return true;
-  }
-  else
-     applog( LOG_WARNING, "%d failed to submit share", submitted_share_count );
-
-  return false;
+   return false;
 }

 static bool wanna_mine(int thr_id)
@@ -1911,8 +1860,6 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
     work_free( work );
     work_copy( work, g_work );
     *nonceptr = 0xffffffffU / opt_n_threads * thr_id;
-//     if ( opt_randomize )
-//       *nonceptr += ( (rand() *4 ) & UINT32_MAX ) / opt_n_threads;
     *end_nonce_ptr = ( 0xffffffffU / opt_n_threads ) * (thr_id+1) - 0x20;
   }
   else
@@ -1930,6 +1877,108 @@ bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
   return true;
 }

+static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
+{
+   pthread_mutex_lock( &sctx->work_lock );
+   free( g_work->job_id );
+   g_work->job_id = strdup( sctx->job.job_id );
+   g_work->xnonce2_len = sctx->xnonce2_size;
+   g_work->xnonce2 = (uchar*) realloc( g_work->xnonce2, sctx->xnonce2_size );
+   memcpy( g_work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size );
+   algo_gate.build_extraheader( g_work, sctx );
+   net_diff = algo_gate.calc_network_diff( g_work );
+   algo_gate.set_work_data_endian( g_work );
+   g_work->height = sctx->block_height;
+   g_work->targetdiff = sctx->job.diff
+                                   / ( opt_target_factor * opt_diff_factor );
+   diff_to_hash( g_work->target, g_work->targetdiff );
+
+   pthread_mutex_unlock( &sctx->work_lock );
+
+   restart_threads();
+
+   if ( opt_debug )
+   {
+      unsigned char *xnonce2str = abin2hex( g_work->xnonce2,
+                                           g_work->xnonce2_len );
+      applog( LOG_DEBUG, "DEBUG: job_id='%s' extranonce2=%s ntime=%08x",
+                    g_work->job_id, xnonce2str, swab32( g_work->data[17] ) );
+      free( xnonce2str );
+   }
+
+   double hr = 0.;
+   pthread_mutex_lock( &stats_lock );
+
+   for ( int i = 0; i < opt_n_threads; i++ )
+      hr += thr_hashrates[i];
+   global_hashrate = hr;
+   pthread_mutex_unlock( &stats_lock );
+
+   if ( stratum_diff != sctx->job.diff )
+      applog( LOG_BLUE, "New Diff %g, Block %d, Job %s",
+                        sctx->job.diff, sctx->block_height, g_work->job_id );
+   else if ( last_block_height != sctx->block_height )
+      applog( LOG_BLUE, "New Block %d, Job %s",
+                        sctx->block_height, g_work->job_id );
+   else if ( g_work->job_id )
+      applog( LOG_BLUE,"New Job %s", g_work->job_id );
+
+   // Update data and calculate new estimates.
+   if ( ( stratum_diff != sctx->job.diff )
+   || ( last_block_height != sctx->block_height ) )
+   {
+      static bool multipool = false;
+      if ( stratum.block_height < last_block_height ) multipool = true;
+      if ( unlikely( !session_first_block ) )
+         session_first_block = stratum.block_height;
+      last_block_height = stratum.block_height;
+      stratum_diff      = sctx->job.diff;
+      last_targetdiff   = g_work->targetdiff;
+      if ( lowest_share < last_targetdiff )
+         lowest_share = 9e99;
+
+      if ( !opt_quiet )
+      {
+         applog2( LOG_INFO, "Diff: Net %.5g, Stratum %.5g, Target %.5g",
+                            net_diff, stratum_diff, g_work->targetdiff );
+
+         if ( likely( hr > 0. ) )
+         {
+            char hr_units[4] = {0};
+            char block_ttf[32];
+            char share_ttf[32];
+
+            sprintf_et( block_ttf, ( net_diff * exp32 ) /  hr );
+            sprintf_et( share_ttf, g_work->targetdiff * exp32 / hr );
+            scale_hash_for_display ( &hr, hr_units );
+            applog2( LOG_INFO, "TTF @ %.2f %sh/s: Block %s, Share %s",
+                               hr, hr_units, block_ttf, share_ttf );
+
+            if ( !multipool && last_block_height > session_first_block )
+            {
+               struct timeval now, et;
+               gettimeofday( &now, NULL );
+               timeval_subtract( &et, &now, &session_start );
+               uint64_t net_ttf =
+                    ( last_block_height - session_first_block ) == 0 ? 0
+                    : et.tv_sec / ( last_block_height - session_first_block );
+               if ( net_diff && net_ttf )
+               {
+                  double net_hr = net_diff * exp32 / net_ttf;
+//                  char net_ttf_str[32];
+                  char net_hr_units[4] = {0};
+
+//                 sprintf_et( net_ttf_str, net_ttf );
+                  scale_hash_for_display ( &net_hr, net_hr_units );
+                  applog2( LOG_INFO, "Net hash rate (est) %.2f %sh/s",
+                                     net_hr, net_hr_units );
+               }
+            }
+         }  // hr > 0
+      } // !quiet
+   }  // new diff/block
+}
+
 static void *miner_thread( void *userdata )
 {
   struct   work work __attribute__ ((aligned (64))) ;
@@ -2050,7 +2099,7 @@ static void *miner_thread( void *userdata )
      	     pthread_mutex_lock( &g_work_lock );

              if ( *nonceptr >= end_nonce )
-                 algo_gate.stratum_gen_work( &stratum, &g_work );
+                 stratum_gen_work( &stratum, &g_work );
              algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce );
              pthread_mutex_unlock( &g_work_lock );
          }
@@ -2101,14 +2150,6 @@ static void *miner_thread( void *userdata )
       else  // getwork inline
          max64 = opt_scantime * thr_hashrates[thr_id];   

-/*       
-       if ( have_stratum )
-          max64 = LP_SCANTIME;
-       else
-          max64 = g_work_time + ( have_longpoll ? LP_SCANTIME : opt_scantime )
-	                      - time(NULL);
-*/
-
       // time limit
       if ( unlikely( opt_time_limit && firstwork_time ) )
       {
@@ -2141,7 +2182,6 @@ static void *miner_thread( void *userdata )
       // Initial value arbitrarilly set to 1000 just to get
       // a sample hashrate for the next time.
       uint32_t work_nonce = *nonceptr;
-//       max64 = 60 * thr_hashrates[thr_id];
       if ( max64 <= 0)
          max64 = 1000;
       if ( work_nonce + max64 > end_nonce )
@@ -2197,16 +2237,18 @@ static void *miner_thread( void *userdata )
       }

 #if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32))
+       // Display CPU temperature and clock rate.
       if (!opt_quiet && mythr->id == 0 )
       {
          int temp = cpu_temp(0);
          static struct timeval cpu_temp_time = {0};
          timeval_subtract( &diff, &tv_end, &cpu_temp_time );
          int wait = temp >= 80 ? 30 : temp >= 70 ? 60 : 120;
+
          if ( ( diff.tv_sec > wait ) || ( temp > hi_temp ) )
          {
             char tempstr[32];
-             int lo_freq, hi_freq;
+             float lo_freq = 0., hi_freq = 0.;
             linux_cpu_hilo_freq( &lo_freq, &hi_freq );
             memcpy( &cpu_temp_time, &tv_end, sizeof(cpu_temp_time) );
             if ( use_colors && ( temp >= 70 ) )
@@ -2219,11 +2261,12 @@ static void *miner_thread( void *userdata )
             else
                sprintf( tempstr, "%d C", temp );
             applog( LOG_NOTICE,"CPU temp: curr %s (max %d), Freq: %.3f/%.3f GHz",
-                     tempstr, hi_temp, (float)lo_freq / 1e6, (float)hi_freq/ 1e6 );
+                     tempstr, hi_temp, lo_freq / 1e6, hi_freq / 1e6 );
             if ( temp > hi_temp ) hi_temp = temp;
          }
       }
 #endif
+
       // display hashrate
       if ( unlikely( opt_hash_meter ) )
       {
@@ -2525,106 +2568,6 @@ void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
          sctx->job.final_sapling_hash );
 }

-void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
-{
-   pthread_mutex_lock( &sctx->work_lock );
-   free( g_work->job_id );
-   g_work->job_id = strdup( sctx->job.job_id );
-   g_work->xnonce2_len = sctx->xnonce2_size;
-   g_work->xnonce2 = (uchar*) realloc( g_work->xnonce2, sctx->xnonce2_size );
-   memcpy( g_work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size );
-   algo_gate.build_extraheader( g_work, sctx );
-   net_diff = algo_gate.calc_network_diff( g_work );
-   algo_gate.set_work_data_endian( g_work );
-   g_work->height = sctx->block_height;
-   g_work->targetdiff = sctx->job.diff
-                                   / ( opt_target_factor * opt_diff_factor );
-   diff_to_target( g_work->target, g_work->targetdiff );
-
-   pthread_mutex_unlock( &sctx->work_lock );
-
-   restart_threads();
-
-   if ( opt_debug )
-   {
-      unsigned char *xnonce2str = abin2hex( g_work->xnonce2,
-                                           g_work->xnonce2_len );
-      applog( LOG_DEBUG, "DEBUG: job_id='%s' extranonce2=%s ntime=%08x",
-                    g_work->job_id, xnonce2str, swab32( g_work->data[17] ) );
-      free( xnonce2str );
-   }
-
-   double hr = 0.;
-   pthread_mutex_lock( &stats_lock );
-
-   for ( int i = 0; i < opt_n_threads; i++ )
-      hr += thr_hashrates[i];
-   global_hashrate = hr;
-   pthread_mutex_unlock( &stats_lock );
-
-   if ( stratum_diff != sctx->job.diff )
-      applog( LOG_BLUE, "New Diff %g, Block %d, Job %s",
-                        sctx->job.diff, sctx->block_height, g_work->job_id );
-   else if ( last_block_height != sctx->block_height )
-      applog( LOG_BLUE, "New Block %d, Job %s",
-                        sctx->block_height, g_work->job_id );
-   else if ( g_work->job_id ) 
-      applog( LOG_BLUE,"New Job %s", g_work->job_id );
-
-   // Update data and calculate new estimates.
-   if ( ( stratum_diff != sctx->job.diff )
-   || ( last_block_height != sctx->block_height ) )
-   {
-      static bool multipool = false;
-      if ( stratum.block_height < last_block_height ) multipool = true;
-      if ( unlikely( !session_first_block ) )
-         session_first_block = stratum.block_height;
-      last_block_height = stratum.block_height;
-      stratum_diff      = sctx->job.diff;
-      last_targetdiff   = g_work->targetdiff;
-
-      if ( !opt_quiet )
-      {
-         applog2( LOG_INFO, "Diff: Net %.5g, Stratum %.5g, Target %.5g",
-                            net_diff, stratum_diff, g_work->targetdiff );
-
-         if ( likely( hr > 0. ) )
-         {
-            char hr_units[4] = {0};
-            char block_ttf[32];
-            char share_ttf[32];
-
-            sprintf_et( block_ttf, ( net_diff * exp32 ) /  hr );
-            sprintf_et( share_ttf, g_work->targetdiff * exp32 / hr );
-            scale_hash_for_display ( &hr, hr_units );
-            applog2( LOG_INFO, "TTF @ %.2f %sh/s: Block %s, Share %s",
-                               hr, hr_units, block_ttf, share_ttf );
-
-            if ( !multipool && last_block_height > session_first_block )
-            {
-               struct timeval now, et;
-               gettimeofday( &now, NULL );
-               timeval_subtract( &et, &now, &session_start );
-               uint64_t net_ttf =
-                    ( last_block_height - session_first_block ) == 0 ? 0
-                    : et.tv_sec / ( last_block_height - session_first_block );
-               if ( net_diff && net_ttf )
-               {
-                  double net_hr = net_diff * exp32 / net_ttf;
-//                  char net_ttf_str[32];
-                  char net_hr_units[4] = {0};
-
-//                 sprintf_et( net_ttf_str, net_ttf );
-                  scale_hash_for_display ( &net_hr, net_hr_units );
-                  applog2( LOG_INFO, "Net hash rate (est) %.2f %sh/s",
-                                     net_hr, net_hr_units );
-               }
-            }
-         }  // hr > 0
-      } // !quiet
-   }  // new diff/block   
-}
-
 static void *stratum_thread(void *userdata )
 {
   struct thr_info *mythr = (struct thr_info *) userdata;
@@ -2686,10 +2629,10 @@ static void *stratum_thread(void *userdata )
      if ( stratum.job.job_id
          && ( !g_work_time || strcmp( stratum.job.job_id, g_work.job_id ) ) )
      {
-         pthread_mutex_lock(&g_work_lock);
-         algo_gate.stratum_gen_work( &stratum, &g_work );
-         time(&g_work_time);
-         pthread_mutex_unlock(&g_work_lock);
+         pthread_mutex_lock( &g_work_lock );
+         stratum_gen_work( &stratum, &g_work );
+         time( &g_work_time );
+         pthread_mutex_unlock( &g_work_lock );
         restart_threads();
      } 

@@ -3342,7 +3285,7 @@ bool check_cpu_capability ()
 //     #endif

     cpu_brand_string( cpu_brand );
-     printf( "CPU: %s.\n", cpu_brand );
+     printf( "CPU: %s\n", cpu_brand );
     
     printf("SW built on " __DATE__
     #ifdef _MSC_VER
@@ -3351,7 +3294,7 @@ bool check_cpu_capability ()
         " with GCC");
        printf(" %d.%d.%d\n", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
     #else
-        printf(".\n");
+        printf("\n");
     #endif

     printf("CPU features: ");
@@ -3558,7 +3501,7 @@ int main(int argc, char *argv[])
   }

   // Initialize stats times and counters
-   memset( share_stats, 0, 2 *  sizeof (struct share_stats_t) );
+   memset( share_stats, 0, s_stats_size *  sizeof (struct share_stats_t) );
   gettimeofday( &last_submit_time, NULL );
   memcpy( &five_min_start, &last_submit_time, sizeof (struct timeval) );
   memcpy( &session_start, &last_submit_time, sizeof (struct timeval) );
--- a/miner.h
+++ b/miner.h
@@ -322,16 +322,20 @@ int    timeval_subtract( struct timeval *result, struct timeval *x,
 //
 // diff_to_hash = 2**32 = 0x100000000 = 4294967296 = exp32;

-const double exp32;  // 2**32
-const double exp48;  // 2**48
-const double exp64;  // 2**64
+#define EXP16 65536.
+#define EXP32 4294967296.
+const long double exp32;  // 2**32
+const long double exp48;  // 2**48
+const long double exp64;  // 2**64
+const long double exp96;  // 2**96
+const long double exp128; // 2**128
+const long double exp160; // 2**160

 bool   fulltest( const uint32_t *hash, const uint32_t *target );
 bool   valid_hash( const void*, const void* );

-void   work_set_target( struct work* work, double diff );
-double target_to_diff( uint32_t* target );
-extern void diff_to_target( uint32_t *target, double diff );
+double hash_to_diff( const void* );
+extern void diff_to_hash( uint32_t*, const double );

 double hash_target_ratio( uint32_t* hash, uint32_t* target );
 void   work_set_target_ratio( struct work* work, const void *hash );
@@ -344,21 +348,12 @@ struct thr_info {
        struct cpu_info cpu;
 };

-//struct thr_info *thr_info;
-
-void test_hash_and_submit( struct work *work, const void *hash,
-                           struct thr_info *thr );
+//int test_hash_and_submit( struct work *work, const void *hash,
+//                           struct thr_info *thr );

 bool submit_solution( struct work *work, const void *hash,
                      struct thr_info *thr );

-// deprecated
-bool submit_lane_solution( struct work *work, const void *hash,
-                           struct thr_info *thr, const int lane );
-
-bool submit_work( struct thr_info *thr, const struct work *work_in );
-
-
 void   get_currentalgo( char* buf, int sz );
 /*
 bool   has_sha();
@@ -541,9 +536,6 @@ enum algos {
        ALGO_BMW,        
        ALGO_BMW512,
        ALGO_C11,         
-        ALGO_CRYPTOLIGHT, 
-        ALGO_CRYPTONIGHT,
-        ALGO_CRYPTONIGHTV7, 
        ALGO_DECRED,
        ALGO_DEEP,
        ALGO_DMD_GR,
@@ -635,9 +627,6 @@ static const char* const algo_names[] = {
        "bmw",
        "bmw512",
        "c11",
-        "cryptolight",
-        "cryptonight",
-        "cryptonightv7",
        "decred",
        "deep",
        "dmd-gr",
@@ -794,9 +783,6 @@ Options:\n\
                          bmw           BMW 256\n\
                          bmw512        BMW 512\n\
                          c11           Chaincoin\n\
-                          cryptolight   Cryptonight-light\n\
-                          cryptonight   Cryptonote legacy\n\
-                          cryptonightv7 variant 7, Monero (XMR)\n\
                          decred        Blake256r14dcr\n\
                          deep          Deepcoin (DCN)\n\
                          dmd-gr        Diamond\n\
@@ -812,8 +798,8 @@ Options:\n\
                          lyra2re       lyra2\n\
                          lyra2rev2     lyrav2\n\
                          lyra2rev3     lyrav2v3, Vertcoin\n\
-                          lyra2z        Zcoin (XZC)\n\
-                          lyra2z330     Lyra2 330 rows, Zoin (ZOI)\n\
+                          lyra2z\n\
+                          lyra2z330     Lyra2 330 rows\n\
                          m7m           Magi (XMG)\n\
                          myr-gr        Myriad-Groestl\n\
                          neoscrypt     NeoScrypt(128, 2, 1)\n\
@@ -853,7 +839,7 @@ Options:\n\
                          x14           X14\n\
                          x15           X15\n\
                          x16r\n\
-                          x16rv2        Ravencoin (RVN)\n\
+                          x16rv2\n\
                          x16rt         Gincoin (GIN)\n\
                          x16rt-veil    Veil (VEIL)\n\
                          x16s\n\
--- a/simd-utils/intrlv.h
+++ b/simd-utils/intrlv.h
@@ -676,6 +676,14 @@ static inline void mm128_bswap32_intrlv80_4x32( void *d, const void *src )
  d[7] = *( (const uint32_t*)(s7) +(i) ); \
 } while(0)
  
+static inline void intrlv_8x32b( void *dst, const void *s0, const void *s1,
+           const void *s2, const void *s3, const void *s4, const void *s5,
+           const void *s6, const void *s7, const int bit_len )
+{
+   for ( int i = 0; i < bit_len/32; i++ )
+      ILEAVE_8x32( i );
+}
+
 static inline void intrlv_8x32( void *dst, const void *s0, const void *s1,
           const void *s2, const void *s3, const void *s4, const void *s5,
           const void *s6, const void *s7, const int bit_len )
@@ -730,6 +738,14 @@ static inline void intrlv_8x32_512( void *dst, const void *s0, const void *s1,
   *( (uint32_t*)(d7) +(i) ) = s[7]; \
 } while(0)

+static inline void dintrlv_8x32b( void *d0, void *d1, void *d2, void *d3,
+             void *d4, void *d5, void *d6, void *d7, const void *src,
+             const int bit_len )
+{
+   for ( int i = 0; i < bit_len/32; i++ )
+      DLEAVE_8x32( i );
+}
+
 static inline void dintrlv_8x32( void *d0, void *d1, void *d2, void *d3,
             void *d4, void *d5, void *d6, void *d7, const void *src,
             const int bit_len )
--- a/simd-utils/simd-128.h
+++ b/simd-utils/simd-128.h
@@ -273,6 +273,20 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
 #define mm128_ror_1x32( v )   _mm_shuffle_epi32( v, 0x39 )
 #define mm128_rol_1x32( v )   _mm_shuffle_epi32( v, 0x93 )

+
+//#define mm128_swap_64( v )    _mm_alignr_epi8( v, v,  8 )
+//#define mm128_ror_1x32( v )   _mm_alignr_epi8( v, v,  4 )
+//#define mm128_rol_1x32( v )   _mm_alignr_epi8( v, v, 12 )
+#define mm128_ror_1x16( v )   _mm_alignr_epi8( v, v,  2 )
+#define mm128_rol_1x16( v )   _mm_alignr_epi8( v, v, 14 )
+#define mm128_ror_1x8( v )    _mm_alignr_epi8( v, v,  1 )
+#define mm128_rol_1x8( v )    _mm_alignr_epi8( v, v, 15 )
+
+#define mm128_ror_x8( v, c )  _mm_alignr_epi8( v, c )
+#define mm128_rol_x8( v, c )  _mm_alignr_epi8( v, 16-(c) )
+
+
+/*
 // Rotate 16 byte (128 bit) vector by c bytes.
 // Less efficient using shift but more versatile. Use only for odd number
 // byte rotations. Use shuffle above whenever possible.
@@ -312,6 +326,8 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
   _mm_or_si128( _mm_slli_si128( v, 1 ), _mm_srli_si128( v, 15 ) )

 #endif   // SSE3 else SSE2
+*/
+

 // Invert vector: {3,2,1,0} -> {0,1,2,3}
 #define mm128_invert_32( v ) _mm_shuffle_epi32( v, 0x1b )
@@ -331,7 +347,7 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
 //
 // Rotate elements within lanes.

-#define mm128_swap_64_32( v )  _mm_shuffle_epi32( v, 0xb1 )
+#define mm128_swap64_32( v )  _mm_shuffle_epi32( v, 0xb1 )

 #define mm128_rol64_8( v, c ) \
     _mm_or_si128( _mm_slli_epi64( v, ( ( (c)<<3 ) ), \
--- a/simd-utils/simd-256.h
+++ b/simd-utils/simd-256.h
@@ -442,16 +442,19 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )

 #define mm256_ror128_32( v )  _mm256_shuffle_epi32( v, 0x39 )

-#define mm256_rol128_1x32( v )  _mm256_shuffle_epi32( v, 0x93 )
+#define mm256_rol128_32( v )  _mm256_shuffle_epi32( v, 0x93 )

-// Rotave each 128 bit lane by c elements.
+#define mm256_ror128_x8( v, c )  _mm256_alignr_epi8( v, v, c ) 
+
+/*
+// Rotate each 128 bit lane by c elements.
 #define mm256_ror128_8( v, c ) \
  _mm256_or_si256( _mm256_bsrli_epi128( v, c ), \
                   _mm256_bslli_epi128( v, 16-(c) ) )
 #define mm256_rol128_8( v, c ) \
  _mm256_or_si256( _mm256_bslli_epi128( v, c ), \
                   _mm256_bsrli_epi128( v, 16-(c) ) )
-
+*/

 // Rotate elements in each 64 bit lane

--- a/simd-utils/simd-512.h
+++ b/simd-utils/simd-512.h
@@ -511,7 +511,9 @@ static inline void memcpy_512( __m512i *dst, const __m512i *src, const int n )
 #define mm512_ror128_32( v )   _mm512_shuffle_epi32( v, 0x39 )
 #define mm512_rol128_32( v )   _mm512_shuffle_epi32( v, 0x93 )

+#define mm512_ror128_x8( v, c )  _mm512_alignr_epi8( v, v, c )

+/*
 // Rotate 128 bit lanes by c bytes, faster than building that monstrous 
 // constant above.  
 #define mm512_ror128_8( v, c ) \
@@ -520,7 +522,7 @@ static inline void memcpy_512( __m512i *dst, const __m512i *src, const int n )
 #define mm512_rol128_8( v, c ) \
   _mm512_or_si512( _mm512_bslli_epi128( v, c ), \
                    _mm512_bsrli_epi128( v, 16-(c) ) )
-
+*/

 //
 // Rotate elements within 64 bit lanes.
--- a/sysinfos.c
+++ b/sysinfos.c
@@ -67,7 +67,6 @@
 #define HWMON_ALT5 \
 "/sys/class/hwmon/hwmon0/device/temp1_input"

-
 static inline float linux_cputemp(int core)
 {
 	float tc = 0.0;
@@ -97,49 +96,43 @@ static inline float linux_cputemp(int core)
 	return tc;
 }

-#define CPUFREQ_PATH \
+
+#define CPUFREQ_PATH0\
 "/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq"

 #define CPUFREQ_PATHn \
 "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq"

-
-// "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq"
-static inline uint32_t linux_cpufreq(int core)
+static inline float linux_cpufreq(int core)
 {
-	FILE *fd = fopen(CPUFREQ_PATH, "r");
-	uint32_t freq = 0;
+	FILE *fd = fopen( CPUFREQ_PATH0, "r" );
+	long int freq = 0;

-	if (!fd)
-		return freq;
-
-	if (!fscanf(fd, "%d", &freq))
-		return freq;
-
-	return freq;
+	if ( !fd ) return (float)freq;
+	if ( !fscanf( fd, "%ld", &freq ) ) freq = 0;
+   fclose( fd );
+	return (float)freq;
 }

-static inline void linux_cpu_hilo_freq( uint32_t* lo, uint32_t *hi )
+static inline void linux_cpu_hilo_freq( float *lo, float *hi )
 {
-   uint64_t freq = 0, hi_freq = 0, lo_freq = 0xffffffffffffffff;
+   long int freq = 0, hi_freq = 0, lo_freq = 0x7fffffff;

   for ( int i = 0; i < num_cpus; i++ )
   {
      char path[64];
      sprintf( path, CPUFREQ_PATHn, i );   
-
      FILE *fd = fopen( path, "r" );
-      if ( fd )
+      if ( !fd ) return;
+      else if ( fscanf( fd, "%ld", &freq ) )
      {
-         if ( fscanf( fd, "%ld", &freq ) )
-         {
-            if ( freq > hi_freq ) hi_freq = freq;
-            if ( freq < lo_freq ) lo_freq = freq;
-         }
+         if ( freq > hi_freq ) hi_freq = freq;
+         if ( freq < lo_freq ) lo_freq = freq;
      }
+      fclose( fd );
   }
-   *hi = hi_freq;
-   *lo = lo_freq;
+   *hi = (float)hi_freq;
+   *lo = (float)lo_freq;
 }


--- a/util.c
+++ b/util.c
@@ -44,7 +44,7 @@
 #include <libgen.h>
 #endif

-#include "miner.h"
+//#include "miner.h"
 #include "elist.h"
 #include "algo-gate-api.h"

@@ -983,24 +983,7 @@ int timeval_subtract(struct timeval *result, struct timeval *x,
 	return x->tv_sec < y->tv_sec;
 }

-// deprecated, use test_hash_and_submit
-// Use this when deinterleaved
-// do 64 bit test 4 iterations
-inline bool valid_hash( const void *hash, const void *target )
-{
-   const uint64_t *h = (const uint64_t*)hash;
-   const uint64_t *t = (const uint64_t*)target;
-   if ( h[3] > t[3] ) return false;
-   if ( h[3] < t[3] ) return true;
-   if ( h[2] > t[2] ) return false;
-   if ( h[2] < t[2] ) return true;
-   if ( h[1] > t[1] ) return false;
-   if ( h[1] < t[1] ) return true;
-   if ( h[0] > t[0] ) return false;
-   return true;
-}
-
-// deprecated, use test_hash_and_submit
+// Deprecated
 bool fulltest( const uint32_t *hash, const uint32_t *target )
 {
 	int i;
@@ -1041,65 +1024,121 @@ bool fulltest( const uint32_t *hash, const uint32_t *target )
 	return rc;
 }

-void diff_to_target(uint32_t *target, double diff)
+// Mathmatically the difficulty is simply the reciprocal of the hash.
+// Both are real numbers but the hash (target) is represented as a 256 bit
+// number with the upper 32 bits representing the whole integer part and the
+// lower 224 bits representing the fractional part:
+//   target[ 255:224 ] = trunc( 1/diff )
+//   target[ 223:  0 ] = frac( 1/diff )
+//
+// The 256 bit hash is exact but any floating point representation is not.
+// Stratum provides the target difficulty as double precision, inexcact, and
+// which must be converted to a hash target. The converted hash target will
+// likely be less precise to to inexact input and conversion error.
+// converted to 256 bit hash which will also be inexact and likelyless
+// accurate to to error in conversion.
+// On the other hand getwork provides a 256 bit hash target which is exact.
+//
+// How much precision is needed?
+//
+// 128 bit types are implemented in software by the compiler using 64 bit
+// hardware resulting in lower performance and more error than would be
+// expected with a hardware 128 bit implementtaion.
+// Float80 exploits the internals of the FP unit which provide a 64 bit
+// mantissa in an 80 bit register with hardware rounding. When the destination
+// is double the data is rounded to float64 format. Long double returns all
+// 80 bits without rounding and including any accumulated computation error.
+// Float80 does not fit efficiently in memory.
+//
+// 256 bit hash: 76
+// float:         7     (float32, 80 bits with rounding to 32 bits)
+// double:       15     (float64, 80 bits with rounding to 64 bits)
+// long double   19     (float80, 80 bits with no rounding)
+// __float128    33     (128 bits with no rounding)
+// uint32_t:      9
+// uint64_t:     19
+// uint128_t     38
+//
+// The concept of significant digits doesn't apply to the 256 bit hash
+// representation. It's fixed point making leading zeros significant
+// Leading zeros count in the 256 bit 
+//
+// Doing calculations with float128 and uint128 increases precision for
+// target_to_diff, but doesn't help with stratum diff being limited to
+// double precision. Is the extra precision really worth the extra cost?
+//
+// With double the error rate is 1/1e15, or one hash in every Petahash
+// with a very low difficulty, not a likely sitiation. Higher difficulty
+// increases the effective precision. Due to the floating nature of the 
+// decimal point leading zeros aren't counted.
+//
+// Unfortunately I can't get float128 to work so long double it is.
+// All calculations will be done using long double then converted to double.
+// This prevent introducing significant new error while taking advantage
+// of HW rounding.
+
+#if defined(GCC_INT128)
+
+void diff_to_hash( uint32_t *target, const double diff )
 {
-   uint64_t m;
-   int k;
-
-   for (k = 6; k > 0 && diff > 1.0; k--)
-      diff /= exp32;
-
-//      diff /= 4294967296.0;
-
-//   m = (uint64_t)(4294901760.0 / diff);
-
-   m = (uint64_t)(exp32 / diff);
-
-   if (m == 0 && k == 6)
-      memset(target, 0xff, 32);
-   else {
-      memset(target, 0, 32);
-      target[k] = (uint32_t)m;
-      target[k + 1] = (uint32_t)(m >> 32);
-   }
+  uint128_t *targ = (uint128_t*)target;
+  register long double m = 1. / diff;
+  targ[0] = 0;
+  targ[1] = (uint128_t)( m * exp96 );
 }

-// deprecated
-void work_set_target(struct work* work, double diff)
+double hash_to_diff( const void *target )
 {
-	diff_to_target( work->target, diff );
-	work->targetdiff = diff;
+   const uint128_t *targ = (const uint128_t*)target;
+   register long double m = ( (long double)targ[1] / exp96 );
+//                        + ( (long double)targ[0] / exp160 );
+   return (double)( 1. / m );
 }

-double target_to_diff( uint32_t* target )
+inline bool valid_hash( const void *hash, const void *target )
 {
-   uint64_t *targ = (uint64_t*)target;
-   // extract 64 bits from target[ 240:176 ]
-   uint64_t m = ( targ[3] << 16 ) | ( targ[2] >> 48 );
-   return m ? (exp48-1.) / (double)m : 0.;
+   const uint128_t *h = (const uint128_t*)hash;
+   const uint128_t *t = (const uint128_t*)target;
+   if ( h[1] > t[1] ) return false;
+   if ( h[1] < t[1] ) return true;
+   if ( h[0] > t[0] ) return false;
+   return true;
 }

-/*
-double target_to_diff(uint32_t* target)
-{
-	uchar* tgt = (uchar*) target;
-	uint64_t m =
-		(uint64_t)tgt[29] << 56 |
-		(uint64_t)tgt[28] << 48 |
-		(uint64_t)tgt[27] << 40 |
-		(uint64_t)tgt[26] << 32 |
-		(uint64_t)tgt[25] << 24 |
-		(uint64_t)tgt[24] << 16 |
-		(uint64_t)tgt[23] << 8  |
-		(uint64_t)tgt[22] << 0;
+#else

-   
-	if (!m)
-		return 0.;
-	else
-		return (double)0x0000ffff00000000/m;
+void diff_to_hash( uint32_t *target, const double diff )
+{
+  uint64_t *targ = (uint64_t*)target;
+  register long double m = ( 1. / diff ) * exp32;
+  targ[1] = targ[0] = 0;
+  targ[3] = (uint64_t)m;
+  targ[2] = (uint64_t)( ( m - (long double)targ[3] ) * exp64 );
 }
-*/
+
+double hash_to_diff( const void *target )
+{
+   const uint64_t *targ = (const uint64_t*)target;
+   register long double m = ( (long double)targ[3] / exp32 )
+                          + ( (long double)targ[2] / exp96 );
+   return (double)( 1. / m );
+}
+
+inline bool valid_hash( const void *hash, const void *target )
+{
+   const uint64_t *h = (const uint64_t*)hash;
+   const uint64_t *t = (const uint64_t*)target;
+   if ( h[3] > t[3] ) return false;
+   if ( h[3] < t[3] ) return true;
+   if ( h[2] > t[2] ) return false;
+   if ( h[2] < t[2] ) return true;
+   if ( h[1] > t[1] ) return false;
+   if ( h[1] < t[1] ) return true;
+   if ( h[0] > t[0] ) return false;
+   return true;
+}
+
+#endif 

 #ifdef WIN32
 #define socket_blocks() (WSAGetLastError() == WSAEWOULDBLOCK)
Author	SHA1	Message	Date
Jay D Dee	972d4d70db	v3.12.8.1	2020-04-17 16:12:45 -04:00
Jay D Dee	e96a6bd699	v3.12.8	2020-04-09 12:56:18 -04:00
Jay D Dee	fb9163185a	v3.12.7	2020-03-20 16:30:12 -04:00