v3.21.3

2025-09-17 23:44:27 +00:00 · 2023-03-11 14:54:49 -05:00
parent fb93160641
commit b339450898
49 changed files with 1120 additions and 1119 deletions
--- a/algo/x16/hex.c
+++ b/algo/x16/hex.c
@@ -25,7 +25,7 @@ static void hex_getAlgoString(const uint32_t* prevblock, char *output)

 static __thread x16r_context_overlay hex_ctx;

-int hex_hash( void* output, const void* input, int thrid )
+int hex_hash( void* output, const void* input, const int thrid )
 {
   uint32_t _ALIGN(128) hash[16];
   x16r_context_overlay ctx;
--- a/algo/x16/minotaur.c
+++ b/algo/x16/minotaur.c
@@ -72,7 +72,7 @@ struct TortureGarden

 // Get a 64-byte hash for given 64-byte input, using given TortureGarden contexts and given algo index
 static int get_hash( void *output, const void *input, TortureGarden *garden,
-	                  unsigned int algo, int thr_id )
+	                  unsigned int algo, const int thr_id )
 {    
 	unsigned char hash[64] __attribute__ ((aligned (64)));
   int rc = 1;
@@ -233,7 +233,7 @@ bool initialize_torture_garden()
 }

 // Produce a 32-byte hash from 80-byte input data
-int minotaur_hash( void *output, const void *input, int thr_id )
+int minotaur_hash( void *output, const void *input, const int thr_id )
 {    
    unsigned char hash[64] __attribute__ ((aligned (64)));
    int rc = 1;
--- a/algo/x16/x16r-4way.c
+++ b/algo/x16/x16r-4way.c
@@ -19,7 +19,7 @@
 // Perform midstate prehash of hash functions with block size <= 72 bytes,
 // 76 bytes for hash functions that operate on 32 bit data.

-void x16r_8way_prehash( void *vdata, void *pdata )
+void x16r_8way_do_prehash( void *vdata, const void *pdata )
 {
   uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
   uint32_t edata[20] __attribute__ ((aligned (64)));
@@ -106,11 +106,18 @@ void x16r_8way_prehash( void *vdata, void *pdata )
   }
 }

+int x16r_8way_prehash( struct work *work )
+{
+   x16r_gate_get_hash_order( work, x16r_hash_order );
+   x16r_8way_do_prehash( x16r_8way_vdata, work->data );
+   return 1;
+}
+
 // Perform the full x16r hash and returns 512 bit intermediate hash.
 // Called by wrapper hash function to optionally continue hashing and
 // convert to final hash.

-int x16r_8way_hash_generic( void* output, const void* input, int thrid )
+int x16r_8way_hash_generic( void* output, const void* input, const int thrid )
 {
   uint32_t vhash[20*8] __attribute__ ((aligned (128)));
   uint32_t hash0[20] __attribute__ ((aligned (16)));
@@ -471,7 +478,7 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )

 // x16-r,-s,-rt wrapper called directly by scanhash to repackage 512 bit
 // hash to 256 bit final hash.
-int x16r_8way_hash( void* output, const void* input, int thrid )
+int x16r_8way_hash( void* output, const void* input, const int thrid )
 {
   uint8_t hash[64*8] __attribute__ ((aligned (128)));
   if ( !x16r_8way_hash_generic( hash, input, thrid ) )
@@ -495,7 +502,6 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
 {
   uint32_t hash[16*8] __attribute__ ((aligned (128)));
   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
-   uint32_t bedata1[2];
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
@@ -508,27 +514,16 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,

   if ( bench )   ptarget[7] = 0x0cff;

-   bedata1[0] = bswap_32( pdata[1] );
-   bedata1[1] = bswap_32( pdata[2] );
+   pthread_rwlock_rdlock( &g_work_lock );
+      memcpy( vdata, x16r_8way_vdata, sizeof vdata );
+   pthread_rwlock_unlock( &g_work_lock );

-   static __thread uint32_t s_ntime = UINT32_MAX;
-   const uint32_t ntime = bswap_32( pdata[17] );
-   if ( s_ntime != ntime )
-   {
-      x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
-      s_ntime = ntime;
-
-      if ( opt_debug && !thr_id )
-          applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime );
-   }
-
-   x16r_8way_prehash( vdata, pdata );
   *noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
                             n+7, 0, n+6, 0, n+5, 0, n+4, 0,
                             n+3, 0, n+2, 0, n+1, 0, n,   0 ), *noncev );
   do
   {
-      if( x16r_8way_hash( hash, vdata, thr_id ) );
+      if( algo_gate.hash( hash, vdata, thr_id ) );
      for ( int i = 0; i < 8; i++ )
      if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
      {
@@ -546,7 +541,7 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,

 #elif defined (X16R_4WAY)

-void x16r_4way_prehash( void *vdata, void *pdata )
+void x16r_4way_do_prehash( void *vdata, const void *pdata )
 {
   uint32_t vdata2[20*4] __attribute__ ((aligned (64)));
   uint32_t edata[20] __attribute__ ((aligned (64)));
@@ -627,7 +622,14 @@ void x16r_4way_prehash( void *vdata, void *pdata )
   }
 }

-int x16r_4way_hash_generic( void* output, const void* input, int thrid )
+int x16r_4way_prehash( struct work *work )
+{
+   x16r_gate_get_hash_order( work, x16r_hash_order );
+   x16r_4way_do_prehash( x16r_4way_vdata, work->data );
+   return 1;
+}
+
+int x16r_4way_hash_generic( void* output, const void* input, const int thrid )
 {
   uint32_t vhash[20*4] __attribute__ ((aligned (128)));
   uint32_t hash0[20] __attribute__ ((aligned (32)));
@@ -635,13 +637,14 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
   uint32_t hash2[20] __attribute__ ((aligned (32)));
   uint32_t hash3[20] __attribute__ ((aligned (32)));
   x16r_4way_context_overlay ctx;
-   memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
   void *in0 = (void*) hash0;
   void *in1 = (void*) hash1;
   void *in2 = (void*) hash2;
   void *in3 = (void*) hash3;
   int size = 80;

+   memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
+
   dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );

   for ( int i = 0; i < 16; i++ )
@@ -905,7 +908,7 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
   return 1;
 }

-int x16r_4way_hash( void* output, const void* input, int thrid )
+int x16r_4way_hash( void* output, const void* input, const int thrid )
 {
   uint8_t hash[64*4] __attribute__ ((aligned (64)));
   if ( !x16r_4way_hash_generic( hash, input, thrid ) )
@@ -924,7 +927,6 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
 {
   uint32_t hash[16*4] __attribute__ ((aligned (64)));
   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
-   uint32_t bedata1[2];
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
@@ -937,25 +939,15 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,

   if ( bench )  ptarget[7] = 0x0cff;

-   bedata1[0] = bswap_32( pdata[1] );
-   bedata1[1] = bswap_32( pdata[2] );
+   pthread_rwlock_rdlock( &g_work_lock );
+      memcpy( vdata, x16r_4way_vdata, sizeof vdata );
+   pthread_rwlock_unlock( &g_work_lock );

-   static __thread uint32_t s_ntime = UINT32_MAX;
-   const uint32_t ntime = bswap_32( pdata[17] );
-   if ( s_ntime != ntime )
-   {
-      x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
-      s_ntime = ntime;
-      if ( opt_debug && !thr_id )
-         applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime );
-   }
-
-   x16r_4way_prehash( vdata, pdata );
   *noncev = mm256_intrlv_blend_32(
                   _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
   do
   {
-      if ( x16r_4way_hash( hash, vdata, thr_id ) );
+      if ( algo_gate.hash( hash, vdata, thr_id ) );
      for ( int i = 0; i < 4; i++ )
      if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
      {
--- a/algo/x16/x16r-gate.c
+++ b/algo/x16/x16r-gate.c
@@ -1,26 +1,44 @@
 #include "x16r-gate.h"
 #include "algo/sha/sha256d.h"

-__thread char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ] = { 0 };
+char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ] = {0};

-void (*x16_r_s_getAlgoString) ( const uint8_t*, char* ) = NULL;
+void (*x16r_gate_get_hash_order) ( const struct work *, char * ) = NULL;

 #if defined (X16R_8WAY)

-__thread x16r_8way_context_overlay x16r_ctx;
+x16r_8way_context_overlay x16r_ctx;
+uint32_t x16r_8way_vdata[24*8] __attribute__ ((aligned (64)));

 #elif defined (X16R_4WAY)

-__thread x16r_4way_context_overlay x16r_ctx;
+x16r_4way_context_overlay x16r_ctx;
+uint32_t x16r_4way_vdata[24*4] __attribute__ ((aligned (64)));
+

 #endif

-__thread x16r_context_overlay x16_ctx;
+#if defined (X16RV2_8WAY)

+x16rv2_8way_context_overlay x16rv2_ctx;

-void x16r_getAlgoString( const uint8_t* prevblock, char *output )
+#elif defined (X16RV2_4WAY)
+
+x16rv2_4way_context_overlay x16rv2_ctx;
+
+#endif
+
+x16r_context_overlay x16_ctx;
+uint32_t x16r_edata[24] __attribute__ ((aligned (32)));
+
+void x16r_get_hash_order( const struct work *work, char *hash_order )
 {
-   char *sptr = output;
+   char *sptr = hash_order;
+   const uint32_t *pdata = work->data;
+   uint8_t prevblock[16];
+   ((uint32_t*)prevblock)[0] = bswap_32( pdata[1] );
+   ((uint32_t*)prevblock)[1] = bswap_32( pdata[2] );
+
   for ( int j = 0; j < X16R_HASH_FUNC_COUNT; j++ )
   {
      uint8_t b = (15 - j) >> 1; // 16 first ascii hex chars (lsb in uint256)
@@ -32,38 +50,51 @@ void x16r_getAlgoString( const uint8_t* prevblock, char *output )
      sptr++;
   }
   *sptr = '\0';
-}

-void x16s_getAlgoString( const uint8_t* prevblock, char *output )
+   if ( !opt_quiet )
+      applog( LOG_INFO, "Hash order %s", x16r_hash_order );
+}
+   
+void x16s_get_hash_order( const struct work *work, char *hash_order )
 {
-   strcpy( output, "0123456789ABCDEF" );
+   const uint32_t *pdata = work->data;
+   uint8_t prevblock[16];
+   ((uint32_t*)prevblock)[0] = bswap_32( pdata[1] );
+   ((uint32_t*)prevblock)[1] = bswap_32( pdata[2] );
+   strcpy( hash_order, "0123456789ABCDEF" );
   for ( int i = 0; i < 16; i++ )
   {
      uint8_t b = (15 - i) >> 1; // 16 ascii hex chars, reversed
      uint8_t algoDigit = (i & 1) ? prevblock[b] & 0xF : prevblock[b] >> 4;
      int offset = algoDigit;
      // insert the nth character at the front
-      char oldVal = output[offset];
+      char oldVal = hash_order[ offset ];
      for( int j = offset; j-- > 0; )
-         output[j+1] = output[j];
-      output[0] = oldVal;
+         hash_order[ j+1 ] = hash_order[ j ];
+      hash_order[ 0 ] = oldVal;
   }
+
+   if ( !opt_quiet )
+      applog( LOG_INFO, "Hash order %s", x16r_hash_order );
 }

 bool register_x16r_algo( algo_gate_t* gate )
 {
 #if defined (X16R_8WAY)
  gate->scanhash  = (void*)&scanhash_x16r_8way;
+  gate->prehash   = (void*)&x16r_8way_prehash;
  gate->hash      = (void*)&x16r_8way_hash;
 #elif defined (X16R_4WAY)
  gate->scanhash  = (void*)&scanhash_x16r_4way;
+  gate->prehash   = (void*)&x16r_4way_prehash;
  gate->hash      = (void*)&x16r_4way_hash;
 #else
  gate->scanhash  = (void*)&scanhash_x16r;
+  gate->prehash   = (void*)&x16r_prehash;
  gate->hash      = (void*)&x16r_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
-  x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
+  x16r_gate_get_hash_order = (void*)&x16r_get_hash_order;
  opt_target_factor = 256.0;
  return true;
 };
@@ -71,17 +102,20 @@ bool register_x16r_algo( algo_gate_t* gate )
 bool register_x16rv2_algo( algo_gate_t* gate )
 {
 #if defined (X16RV2_8WAY)
-  gate->scanhash  = (void*)&scanhash_x16rv2_8way;
+  gate->scanhash  = (void*)&scanhash_x16r_8way;
+  gate->prehash   = (void*)&x16rv2_8way_prehash;
  gate->hash      = (void*)&x16rv2_8way_hash;
 #elif defined (X16RV2_4WAY)
-  gate->scanhash  = (void*)&scanhash_x16rv2_4way;
+  gate->scanhash  = (void*)&scanhash_x16r_4way;
+  gate->prehash   = (void*)&x16rv2_4way_prehash;
  gate->hash      = (void*)&x16rv2_4way_hash;
 #else
-  gate->scanhash  = (void*)&scanhash_x16rv2;
+  gate->scanhash  = (void*)&scanhash_x16r;
+  gate->prehash   = (void*)&x16rv2_prehash;
  gate->hash      = (void*)&x16rv2_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
-  x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
+  x16r_gate_get_hash_order = (void*)&x16r_get_hash_order;
  opt_target_factor = 256.0;
  return true;
 };
@@ -90,16 +124,19 @@ bool register_x16s_algo( algo_gate_t* gate )
 {
 #if defined (X16R_8WAY)
  gate->scanhash  = (void*)&scanhash_x16r_8way;
+  gate->prehash   = (void*)&x16r_8way_prehash;
  gate->hash      = (void*)&x16r_8way_hash;
 #elif defined (X16R_4WAY)
  gate->scanhash  = (void*)&scanhash_x16r_4way;
+  gate->prehash   = (void*)&x16r_4way_prehash;
  gate->hash      = (void*)&x16r_4way_hash;
 #else
  gate->scanhash  = (void*)&scanhash_x16r;
+  gate->prehash   = (void*)&x16r_prehash;
  gate->hash      = (void*)&x16r_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
-  x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
+  x16r_gate_get_hash_order = (void*)&x16s_get_hash_order;
  opt_target_factor = 256.0;
  return true;
 };
@@ -108,30 +145,33 @@ bool register_x16s_algo( algo_gate_t* gate )
 //
 //   X16RT

+void x16rt_get_hash_order( const struct work * work, char * hash_order )
+{   
+   uint32_t _ALIGN(64) timehash[8*8];
+   const uint32_t ntime = bswap_32( work->data[17] );
+   const int32_t masked_ntime = ntime & 0xffffff80;
+   uint8_t* data = (uint8_t*)timehash;
+   char *sptr = hash_order;

-void x16rt_getTimeHash( const uint32_t timeStamp, void* timeHash )
-{
-    int32_t maskedTime = timeStamp & 0xffffff80;
-    sha256d( (unsigned char*)timeHash, (const unsigned char*)( &maskedTime ),
-             sizeof( maskedTime ) );
-}
+   sha256d( (unsigned char*)timehash, (const unsigned char*)( &masked_ntime ),
+             sizeof( masked_ntime ) );

-void x16rt_getAlgoString( const uint32_t *timeHash, char *output)
-{
-   char *sptr = output;
-   uint8_t* data = (uint8_t*)timeHash;
-
-   for (uint8_t j = 0; j < X16R_HASH_FUNC_COUNT; j++) {
+   for ( uint8_t j = 0; j < X16R_HASH_FUNC_COUNT; j++ )
+   {
      uint8_t b = (15 - j) >> 1; // 16 ascii hex chars, reversed
      uint8_t algoDigit = (j & 1) ? data[b] & 0xF : data[b] >> 4;

-      if (algoDigit >= 10)
-         sprintf(sptr, "%c", 'A' + (algoDigit - 10));
+      if ( algoDigit >= 10 )
+         sprintf( sptr, "%c", 'A' + (algoDigit - 10) );
      else
-         sprintf(sptr, "%u", (uint32_t) algoDigit);
+         sprintf( sptr, "%u", (uint32_t) algoDigit );
      sptr++;
   }
   *sptr = '\0';
+
+   if ( !opt_quiet )
+      applog( LOG_INFO, "Hash order %s, ntime %08x, time hash %08x",
+                         hash_order, ntime, timehash );
 }

 void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
@@ -222,15 +262,19 @@ void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
 bool register_x16rt_algo( algo_gate_t* gate )
 {
 #if defined (X16R_8WAY)
-  gate->scanhash  = (void*)&scanhash_x16rt_8way;
+  gate->scanhash  = (void*)&scanhash_x16r_8way;
+  gate->prehash   = (void*)&x16r_8way_prehash;
  gate->hash      = (void*)&x16r_8way_hash;
 #elif defined (X16R_4WAY)
-  gate->scanhash  = (void*)&scanhash_x16rt_4way;
+  gate->scanhash  = (void*)&scanhash_x16r_4way;
+  gate->prehash   = (void*)&x16r_4way_prehash;
  gate->hash      = (void*)&x16r_4way_hash;
 #else
-  gate->scanhash  = (void*)&scanhash_x16rt;
+  gate->scanhash  = (void*)&scanhash_x16r;
+  gate->prehash   = (void*)&x16r_prehash;
  gate->hash      = (void*)&x16r_hash;
 #endif
+  x16r_gate_get_hash_order = (void*)&x16rt_get_hash_order;
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
  opt_target_factor = 256.0;
  return true;
@@ -239,16 +283,20 @@ bool register_x16rt_algo( algo_gate_t* gate )
 bool register_x16rt_veil_algo( algo_gate_t* gate )
 {
 #if defined (X16R_8WAY)
-  gate->scanhash  = (void*)&scanhash_x16rt_8way;
+  gate->scanhash  = (void*)&scanhash_x16r_8way;
+  gate->prehash   = (void*)&x16r_8way_prehash;
  gate->hash      = (void*)&x16r_8way_hash;
 #elif defined (X16R_4WAY)
-  gate->scanhash  = (void*)&scanhash_x16rt_4way;
+  gate->scanhash  = (void*)&scanhash_x16r_4way;
+  gate->prehash   = (void*)&x16r_4way_prehash;
  gate->hash      = (void*)&x16r_4way_hash;
 #else
-  gate->scanhash  = (void*)&scanhash_x16rt;
+  gate->scanhash  = (void*)&scanhash_x16r;
+  gate->prehash   = (void*)&x16r_prehash;
  gate->hash      = (void*)&x16r_hash;
 #endif
  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
+  x16r_gate_get_hash_order = (void*)&x16rt_get_hash_order;
  gate->build_extraheader = (void*)&veil_build_extraheader;
  opt_target_factor = 256.0;
  return true;
@@ -275,20 +323,23 @@ bool register_hex_algo( algo_gate_t* gate )
 bool register_x21s_algo( algo_gate_t* gate )
 {
 #if defined (X16R_8WAY)
-  gate->scanhash          = (void*)&scanhash_x21s_8way;
+  gate->scanhash          = (void*)&scanhash_x16r_8way;
+  gate->prehash           = (void*)&x16r_8way_prehash;
  gate->hash              = (void*)&x21s_8way_hash;
  gate->miner_thread_init = (void*)&x21s_8way_thread_init;
 #elif defined (X16R_4WAY)
-  gate->scanhash          = (void*)&scanhash_x21s_4way;
+  gate->scanhash          = (void*)&scanhash_x16r_4way;
+  gate->prehash           = (void*)&x16r_4way_prehash;
  gate->hash              = (void*)&x21s_4way_hash;
  gate->miner_thread_init = (void*)&x21s_4way_thread_init;
 #else
-  gate->scanhash          = (void*)&scanhash_x21s;
+  gate->scanhash          = (void*)&scanhash_x16r;
+  gate->prehash           = (void*)&x16r_prehash;
  gate->hash              = (void*)&x21s_hash;
  gate->miner_thread_init = (void*)&x21s_thread_init;
 #endif
  gate->optimizations  = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
-  x16_r_s_getAlgoString   = (void*)&x16s_getAlgoString;
+  x16r_gate_get_hash_order = (void*)&x16s_get_hash_order;
  opt_target_factor = 256.0;
  return true;
 };
--- a/algo/x16/x16r-gate.h
+++ b/algo/x16/x16r-gate.h
@@ -21,6 +21,7 @@
 #include "algo/shabal/sph_shabal.h"
 #include "algo/whirlpool/sph_whirlpool.h"
 #include "algo/sha/sph_sha2.h"
+#include "algo/tiger/sph_tiger.h"

 #if defined(__AES__)
 #include "algo/echo/aes_ni/hash_api.h"
@@ -57,13 +58,11 @@

  #define X16R_8WAY   1
  #define X16RV2_8WAY 1
-  #define X16RT_8WAY  1
  #define X21S_8WAY   1

 #elif defined(__AVX2__) && defined(__AES__)

  #define X16RV2_4WAY 1
-  #define X16RT_4WAY  1
  #define X21S_4WAY   1
  #define X16R_4WAY   1

@@ -89,23 +88,29 @@ enum x16r_Algo {
        X16R_HASH_FUNC_COUNT
 };

-extern __thread char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ];

-extern void (*x16_r_s_getAlgoString) ( const uint8_t*, char* );
-void x16r_getAlgoString( const uint8_t *prevblock, char *output );
-void x16s_getAlgoString( const uint8_t *prevblock, char *output );
-void x16rt_getAlgoString( const uint32_t *timeHash, char *output );
+//extern __thread char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ];
+extern char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ];
+
+
+extern void (*x16r_gate_get_hash_order) ( const struct work *, char * );
+
+// x16r, x16rv2
+void x16r_get_hash_order( const struct work *, char * );
+// x16s, x21s
+void x16s_get_hash_order( const struct work *, char * );
+// x16rt
+void x16rt_get_hash_order( const struct work *, char * );

-void x16rt_getTimeHash( const uint32_t timeStamp, void* timeHash );

 bool register_x16r_algo( algo_gate_t* gate );
 bool register_x16rv2_algo( algo_gate_t* gate );
 bool register_x16s_algo( algo_gate_t* gate );
 bool register_x16rt_algo( algo_gate_t* gate );
-bool register_hex__algo( algo_gate_t* gate );
-bool register_x21s__algo( algo_gate_t* gate );
+bool register_hex_algo( algo_gate_t* gate );
+bool register_x21s_algo( algo_gate_t* gate );

-// x16r, x16s
+// x16r, x16s, x16rt
 #if defined(X16R_8WAY)

 union _x16r_8way_context_overlay
@@ -136,15 +141,15 @@ union _x16r_8way_context_overlay

 typedef union _x16r_8way_context_overlay x16r_8way_context_overlay;

-extern __thread x16r_8way_context_overlay x16r_ctx;
+extern x16r_8way_context_overlay x16r_ctx;
+extern uint32_t x16r_8way_vdata[24*8] __attribute__ ((aligned (64)));

-void x16r_8way_prehash( void *, void * );
-int x16r_8way_hash_generic( void *, const void *, int );
-int x16r_8way_hash( void *, const void *, int );
+void x16r_8way_do_prehash( void *, const void * );
+int x16r_8way_prehash( struct work * );
+int x16r_8way_hash_generic( void *, const void *, const int );
+int x16r_8way_hash( void *, const void *, const int );
 int scanhash_x16r_8way( struct work *, uint32_t ,
                        uint64_t *, struct thr_info * );
-extern __thread x16r_8way_context_overlay x16r_ctx;
-

 #elif defined(X16R_4WAY)

@@ -177,14 +182,15 @@ union _x16r_4way_context_overlay

 typedef union _x16r_4way_context_overlay x16r_4way_context_overlay;

-extern __thread x16r_4way_context_overlay x16r_ctx;
+extern x16r_4way_context_overlay x16r_ctx;
+extern uint32_t x16r_4way_vdata[24*4] __attribute__ ((aligned (64)));

-void x16r_4way_prehash( void *, void * );
-int x16r_4way_hash_generic( void *, const void *, int );
-int x16r_4way_hash( void *, const void *, int );
+void x16r_4way_do_prehash( void *, const void * );
+int x16r_4way_prehash( struct work * );
+int x16r_4way_hash_generic( void *, const void *, const int );
+int x16r_4way_hash( void *, const void *, const int );
 int scanhash_x16r_4way( struct work *, uint32_t,
                        uint64_t *, struct thr_info * );
-extern __thread x16r_4way_context_overlay x16r_ctx;

 #endif

@@ -217,80 +223,113 @@ union _x16r_context_overlay

 typedef union _x16r_context_overlay x16r_context_overlay;

-extern __thread x16r_context_overlay x16_ctx;
+extern x16r_context_overlay x16_ctx;
+extern uint32_t x16r_edata[24] __attribute__ ((aligned (32)));

-void x16r_prehash( void *, void * );
-int x16r_hash_generic( void *, const void *, int );
-int x16r_hash( void *, const void *, int );
+void x16r_do_prehash( const void * );
+int x16r_prehash( const struct work * );
+int x16r_hash_generic( void *, const void *, const int );
+int x16r_hash( void *, const void *, const int );
 int scanhash_x16r( struct work *, uint32_t, uint64_t *, struct thr_info * );

 // x16Rv2
 #if defined(X16RV2_8WAY)

-int x16rv2_8way_hash( void *state, const void *input, int thrid );
-int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
-                          uint64_t *hashes_done, struct thr_info *mythr );
+union _x16rv2_8way_context_overlay
+{
+    blake512_8way_context   blake;
+    bmw512_8way_context     bmw;
+    skein512_8way_context   skein;
+    jh512_8way_context      jh;
+    keccak512_8way_context  keccak;
+    luffa_4way_context      luffa;
+    cubehashParam           cube;
+    simd_4way_context       simd;
+    hamsi512_8way_context   hamsi;
+    hashState_fugue         fugue;
+    shabal512_8way_context  shabal;
+    sph_whirlpool_context   whirlpool;
+    sha512_8way_context     sha512;
+    sph_tiger_context       tiger;
+#if defined(__VAES__)
+    groestl512_4way_context groestl;
+    shavite512_4way_context shavite;
+    echo_4way_context       echo;
+#else
+    hashState_groestl       groestl;
+    shavite512_context      shavite;
+    hashState_echo          echo;
+#endif
+} __attribute__ ((aligned (64)));
+
+typedef union _x16rv2_8way_context_overlay x16rv2_8way_context_overlay;
+extern x16rv2_8way_context_overlay x16rv2_ctx;
+
+int x16rv2_8way_prehash( struct work * );
+int x16rv2_8way_hash( void *state, const void *input, const int thrid );
+//int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
+//                          uint64_t *hashes_done, struct thr_info *mythr );

 #elif defined(X16RV2_4WAY)

-int x16rv2_4way_hash( void *state, const void *input, int thrid );
-int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
-                        uint64_t *hashes_done, struct thr_info *mythr );
-
+union _x16rv2_4way_context_overlay
+{
+    blake512_4way_context   blake;
+    bmw512_4way_context     bmw;
+#if defined(__VAES__)
+    groestl512_2way_context groestl;
+    shavite512_2way_context shavite;
+    echo_2way_context       echo;
 #else
-
-int x16rv2_hash( void *state, const void *input, int thr_id );
-int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
-                   uint64_t *hashes_done, struct thr_info *mythr );
-
+    hashState_groestl       groestl;
+    shavite512_context      shavite;
+    hashState_echo          echo;
 #endif
+    skein512_4way_context   skein;
+    jh512_4way_context      jh;
+    keccak512_4way_context  keccak;
+    luffa_2way_context      luffa;
+    cubehashParam           cube;
+    simd_2way_context       simd;
+    hamsi512_4way_context   hamsi;
+    hashState_fugue         fugue;
+    shabal512_4way_context  shabal;
+    sph_whirlpool_context   whirlpool;
+    sha512_4way_context     sha512;
+    sph_tiger_context       tiger;
+};

-// x16rt, veil
-#if defined(X16R_8WAY)
+typedef union _x16rv2_4way_context_overlay x16rv2_4way_context_overlay;
+extern x16rv2_4way_context_overlay x16rv2_ctx;

-//void x16rt_8way_hash( void *state, const void *input );
-int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
-                        uint64_t *hashes_done, struct thr_info *mythr );
-
-#elif defined(X16R_4WAY)
-
-//void x16rt_4way_hash( void *state, const void *input );
-int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
-                        uint64_t *hashes_done, struct thr_info *mythr );
+int x16rv2_4way_hash( void *state, const void *input, const int thrid );
+int x16rv2_4way_prehash( struct work * );

 #else

-//void x16rt_hash( void *state, const void *input );
-int scanhash_x16rt( struct work *work, uint32_t max_nonce,
-                   uint64_t *hashes_done, struct thr_info *mythr );
+int x16rv2_hash( void *state, const void *input, const int thr_id );
+int x16rv2_prehash( const struct work * );

 #endif

 // x21s
 #if defined(X16R_8WAY)

-int x21s_8way_hash( void *state, const void *input, int thrid );
-int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
-                        uint64_t *hashes_done, struct thr_info *mythr );
+int x21s_8way_hash( void *state, const void *input, const int thrid );
 bool x21s_8way_thread_init();

 #elif defined(X16R_4WAY)

-int x21s_4way_hash( void *state, const void *input, int thrid );
-int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
-                        uint64_t *hashes_done, struct thr_info *mythr );
+int x21s_4way_hash( void *state, const void *input, const int thrid );
 bool x21s_4way_thread_init();

 #else

-int x21s_hash( void *state, const void *input, int thr_id );
-int scanhash_x21s( struct work *work, uint32_t max_nonce,
-                  uint64_t *hashes_done, struct thr_info *mythr );
+int x21s_hash( void *state, const void *input, const int thr_id );
 bool x21s_thread_init();

 #endif

-//void hex_hash( void *state, const void *input );
 int scanhash_hex( struct work *work, uint32_t max_nonce,
                  uint64_t *hashes_done, struct thr_info *mythr );

--- a/algo/x16/x16r.c
+++ b/algo/x16/x16r.c
@@ -10,7 +10,7 @@
 #include <stdlib.h>
 #include <string.h>

-void x16r_prehash( void *edata, void *pdata )
+void x16r_do_prehash( const void *edata )
 {
   const char elem = x16r_hash_order[0];
   const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
@@ -48,7 +48,7 @@ void x16r_prehash( void *edata, void *pdata )
   }
 }

-int x16r_hash_generic( void* output, const void* input, int thrid )
+int x16r_hash_generic( void* output, const void* input, const int thrid )
 {
   uint32_t _ALIGN(128) hash[16];
   x16r_context_overlay ctx;
@@ -192,7 +192,15 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
   return true;
 }

-int x16r_hash( void* output, const void* input, int thrid )
+int x16r_prehash( const struct work *work )
+{
+   mm128_bswap32_80( x16r_edata, work->data );
+   x16r_gate_get_hash_order( work, x16r_hash_order );
+   x16r_do_prehash( x16r_edata );  
+   return 1;
+}
+
+int x16r_hash( void* output, const void* input, const int thrid )
 {  
   uint8_t hash[64] __attribute__ ((aligned (64)));
   if ( !x16r_hash_generic( hash, input, thrid ) )
@@ -205,8 +213,8 @@ int x16r_hash( void* output, const void* input, int thrid )
 int scanhash_x16r( struct work *work, uint32_t max_nonce,
                   uint64_t *hashes_done, struct thr_info *mythr )
 {
-   uint32_t _ALIGN(128) hash32[8];
-   uint32_t _ALIGN(128) edata[20];
+   uint32_t _ALIGN(32) hash32[8];
+   uint32_t _ALIGN(32) edata[20];
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
@@ -216,24 +224,14 @@ int scanhash_x16r( struct work *work, uint32_t max_nonce,
   const bool bench = opt_benchmark;
   if ( bench )  ptarget[7] = 0x0cff;

-   mm128_bswap32_80( edata, pdata );
-
-   static __thread uint32_t s_ntime = UINT32_MAX;
-   if ( s_ntime != pdata[17] )
-   {
-      uint32_t ntime = swab32(pdata[17]);
-      x16_r_s_getAlgoString( (const uint8_t*)(&edata[1]), x16r_hash_order );
-      s_ntime = ntime;
-      if ( opt_debug && !thr_id )
-           applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime );
-   }
-
-   x16r_prehash( edata, pdata );
+   pthread_rwlock_rdlock( &g_work_lock );
+      memcpy( edata, x16r_edata, sizeof edata );
+   pthread_rwlock_unlock( &g_work_lock );

   do
   {
      edata[19] = nonce;
-      if ( x16r_hash( hash32, edata, thr_id ) )
+      if ( algo_gate.hash( hash32, edata, thr_id ) )
      if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
      {
         pdata[19] = bswap_32( nonce );
--- a/algo/x16/x16rt-4way.c
+++ b/algo/x16/x16rt-4way.c
@@ -1,113 +0,0 @@
-#include "x16r-gate.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#if defined (X16R_8WAY)
-
-int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
-                        uint64_t *hashes_done, struct thr_info *mythr)
-{
-   uint32_t hash[16*8] __attribute__ ((aligned (128)));
-   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
-   uint32_t _ALIGN(64) timeHash[8*8];
-   uint32_t *pdata = work->data;
-   uint32_t *ptarget = work->target;
-   const uint32_t first_nonce = pdata[19];
-   const uint32_t last_nonce = max_nonce - 8;
-   uint32_t n = first_nonce;
-    __m512i  *noncev = (__m512i*)vdata + 9;   // aligned
-   const int thr_id = mythr->id;
-   volatile uint8_t *restart = &(work_restart[thr_id].restart);
-   const bool bench = opt_benchmark;
-
-   if ( bench )   ptarget[7] = 0x0cff;
-
-   static __thread uint32_t s_ntime = UINT32_MAX;
-   uint32_t masked_ntime = bswap_32( pdata[17] ) & 0xffffff80;
-   if ( s_ntime != masked_ntime )
-   {
-      x16rt_getTimeHash( masked_ntime, &timeHash );
-      x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
-      s_ntime = masked_ntime;
-      if ( !thr_id )
-          applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x",
-                            x16r_hash_order, bswap_32( pdata[17] ), timeHash );
-   }
-
-   x16r_8way_prehash( vdata, pdata );
-   *noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
-                             n+7, 0, n+6, 0, n+5, 0, n+4, 0,
-                             n+3, 0, n+2, 0, n+1, 0, n,   0 ), *noncev );
-   do
-   {
-      if ( x16r_8way_hash( hash, vdata, thr_id ) )
-      for ( int i = 0; i < 8; i++ )
-      if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
-      {
-         pdata[19] = bswap_32( n+i );
-         submit_solution( work, hash+(i<<3), mythr );
-      }
-      *noncev = _mm512_add_epi32( *noncev,
-                                  m512_const1_64( 0x0000000800000000 ) );
-      n += 8;
-   } while ( likely( ( n < last_nonce ) && !(*restart) ) );
-   pdata[19] = n;
-   *hashes_done = n - first_nonce;
-   return 0;
-}
-
-#elif defined (X16R_4WAY)
-
-int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
-                        uint64_t *hashes_done, struct thr_info *mythr)
-{
-   uint32_t hash[4*16] __attribute__ ((aligned (64)));
-   uint32_t vdata[24*4] __attribute__ ((aligned (64)));
-   uint32_t _ALIGN(64) timeHash[4*8];
-   uint32_t *pdata = work->data;
-   uint32_t *ptarget = work->target;
-   const uint32_t first_nonce = pdata[19];
-   const uint32_t last_nonce = max_nonce - 4;
-   uint32_t n = first_nonce;
-   const int thr_id = mythr->id;  
-    __m256i  *noncev = (__m256i*)vdata + 9;   // aligned
-   volatile uint8_t *restart = &(work_restart[thr_id].restart);
-   const bool bench = opt_benchmark;
-
-   if ( bench )  ptarget[7] = 0x0cff;
-
-   static __thread uint32_t s_ntime = UINT32_MAX;
-   uint32_t masked_ntime = bswap_32( pdata[17] ) & 0xffffff80;
-   if ( s_ntime != masked_ntime )
-   {
-      x16rt_getTimeHash( masked_ntime, &timeHash );
-      x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
-      s_ntime = masked_ntime;
-      if ( !thr_id )
-          applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x",
-                            x16r_hash_order, bswap_32( pdata[17] ), timeHash );
-   }
-
-   x16r_4way_prehash( vdata, pdata );
-   *noncev = mm256_intrlv_blend_32(
-                   _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
-   do
-   {
-      if ( x16r_4way_hash( hash, vdata, thr_id ) )
-      for ( int i = 0; i < 4; i++ )
-      if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
-      {
-         pdata[19] = bswap_32( n+i );
-         submit_solution( work, hash+(i<<3), mythr );
-      }
-      *noncev = _mm256_add_epi32( *noncev,
-                                  m256_const1_64( 0x0000000400000000 ) );
-      n += 4;
-   } while ( (  n < last_nonce ) && !(*restart) );
-   pdata[19] = n;
-   *hashes_done = n - first_nonce;
-   return 0;
-}
-
-#endif
--- a/algo/x16/x16rt.c
+++ b/algo/x16/x16rt.c
@@ -1,53 +0,0 @@
-#include "x16r-gate.h"
-
-#if !defined(X16R_8WAY) && !defined(X16R_4WAY)
-
-int scanhash_x16rt( struct work *work, uint32_t max_nonce,
-                    uint64_t *hashes_done, struct thr_info *mythr )
-{
-   uint32_t _ALIGN(128) hash32[8];
-   uint32_t _ALIGN(128) edata[20];
-   uint32_t _ALIGN(64) timeHash[8];
-   uint32_t *pdata = work->data;
-   uint32_t *ptarget = work->target;
-   const uint32_t first_nonce = pdata[19];
-   const int thr_id = mythr->id; 
-   uint32_t nonce = first_nonce;
-   volatile uint8_t *restart = &(work_restart[thr_id].restart);
-   const bool bench = opt_benchmark;
-   if ( bench )  ptarget[7] = 0x0cff;
-
-   mm128_bswap32_80( edata, pdata );
-
-   static __thread uint32_t s_ntime = UINT32_MAX;
-   uint32_t masked_ntime = swab32( pdata[17] ) & 0xffffff80;
-   if ( s_ntime != masked_ntime )
-   {
-      x16rt_getTimeHash( masked_ntime, &timeHash );
-      x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
-      s_ntime = masked_ntime;
-      if ( opt_debug && !thr_id )
-          applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
-                        x16r_hash_order, swab32( pdata[17] ), timeHash );
-   }
-   
-   x16r_prehash( edata, pdata );
-   
-   do
-   {
-      edata[19] = nonce;
-      if ( x16r_hash( hash32, edata, thr_id ) )
-      if ( valid_hash( hash32, ptarget ) && !bench )
-      {
-         pdata[19] = bswap_32( nonce );
-         submit_solution( work, hash32, mythr );
-      }
-      nonce++;
-   } while ( nonce < max_nonce && !(*restart) );
-   pdata[19] = nonce;
-   *hashes_done = pdata[19] - first_nonce;
-   return 0;
-}
-
-#endif  // !defined(X16R_8WAY) && !defined(X16R_4WAY)
-
--- a/algo/x16/x16rv2-4way.c
+++ b/algo/x16/x16rv2-4way.c
@@ -12,37 +12,73 @@

 #if defined (X16RV2_8WAY)

-union _x16rv2_8way_context_overlay
+void x16rv2_8way_do_prehash( void *vdata, void *pdata )
 {
-    blake512_8way_context   blake;
-    bmw512_8way_context     bmw;
-    skein512_8way_context   skein;
-    jh512_8way_context      jh;
-    keccak512_8way_context  keccak;
-    luffa_4way_context      luffa;
-    cubehashParam           cube;
-    simd_4way_context       simd;
-    hamsi512_8way_context   hamsi;
-    hashState_fugue         fugue;
-    shabal512_8way_context  shabal;
-    sph_whirlpool_context   whirlpool;
-    sha512_8way_context     sha512;
-    sph_tiger_context       tiger;
-#if defined(__VAES__)
-    groestl512_4way_context groestl;
-    shavite512_4way_context shavite;
-    echo_4way_context       echo;
-#else
-    hashState_groestl       groestl;
-    shavite512_context      shavite;
-    hashState_echo          echo;
-#endif
-} __attribute__ ((aligned (64)));
+   uint32_t vdata32[20*8] __attribute__ ((aligned (64)));
+   uint32_t edata[20] __attribute__ ((aligned (64)));

-typedef union _x16rv2_8way_context_overlay x16rv2_8way_context_overlay;
-static __thread x16rv2_8way_context_overlay x16rv2_ctx;
+   const char elem = x16r_hash_order[0];
+   const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';

-int x16rv2_8way_hash( void* output, const void* input, int thrid )
+   switch ( algo )
+   {
+      case JH:
+         mm512_bswap32_intrlv80_8x64( vdata, pdata );
+         jh512_8way_init( &x16rv2_ctx.jh );
+         jh512_8way_update( &x16rv2_ctx.jh, vdata, 64 );
+      break;
+      case KECCAK:
+      case LUFFA:
+      case SHA_512:
+         mm128_bswap32_80( edata, pdata );
+         sph_tiger_init( &x16rv2_ctx.tiger );
+         sph_tiger( &x16rv2_ctx.tiger, edata, 64 );
+         intrlv_8x64( vdata, edata, edata, edata, edata,
+                             edata, edata, edata, edata, 640 );
+      break;
+      case SKEIN:
+         mm512_bswap32_intrlv80_8x64( vdata, pdata );
+         skein512_8way_init( &x16rv2_ctx.skein );
+         skein512_8way_update( &x16rv2_ctx.skein, vdata, 64 );
+      break;
+      case CUBEHASH:
+         mm128_bswap32_80( edata, pdata );
+         cubehashInit( &x16rv2_ctx.cube, 512, 16, 32 );
+         cubehashUpdate( &x16rv2_ctx.cube, (const byte*)edata, 64 );
+         intrlv_8x64( vdata, edata, edata, edata, edata,
+                             edata, edata, edata, edata, 640 );
+      break;
+      case HAMSI:
+         mm512_bswap32_intrlv80_8x64( vdata, pdata );
+         hamsi512_8way_init( &x16rv2_ctx.hamsi );
+         hamsi512_8way_update( &x16rv2_ctx.hamsi, vdata, 64 );
+      break;
+      case SHABAL:
+         mm256_bswap32_intrlv80_8x32( vdata32, pdata );
+         shabal512_8way_init( &x16rv2_ctx.shabal );
+         shabal512_8way_update( &x16rv2_ctx.shabal, vdata32, 64 );
+         rintrlv_8x32_8x64( vdata, vdata32, 640 );
+      break;
+      case WHIRLPOOL:
+         mm128_bswap32_80( edata, pdata );
+         sph_whirlpool_init( &x16rv2_ctx.whirlpool );
+         sph_whirlpool( &x16rv2_ctx.whirlpool, edata, 64 );
+         intrlv_8x64( vdata, edata, edata, edata, edata,
+                             edata, edata, edata, edata, 640 );
+      break;
+      default:
+         mm512_bswap32_intrlv80_8x64( vdata, pdata );
+   }
+}
+
+int x16rv2_8way_prehash( struct work *work )
+{
+   x16r_gate_get_hash_order( work, x16r_hash_order );
+   x16rv2_8way_do_prehash( x16r_8way_vdata, work->data );
+   return 1;
+}
+
+int x16rv2_8way_hash( void* output, const void* input, const int thrid )
 {
   uint32_t vhash[24*8] __attribute__ ((aligned (128)));
   uint32_t hash0[24] __attribute__ ((aligned (32)));
@@ -557,50 +593,28 @@ int x16rv2_8way_hash( void* output, const void* input, int thrid )
   return 1;
 }

-int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
-                        uint64_t *hashes_done, struct thr_info *mythr)
+#elif defined (X16RV2_4WAY)
+
+// Pad the 24 bytes tiger hash to 64 bytes
+inline void padtiger512( uint32_t* hash )
 {
-   uint32_t hash[16*8] __attribute__ ((aligned (128)));
-   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
-   uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
+  for ( int i = 6; i < 16; i++ ) hash[i] = 0;
+}
+
+void x16rv2_4way_do_prehash( void *vdata, void *pdata )
+{
+   uint32_t vdata32[20*4] __attribute__ ((aligned (64)));
   uint32_t edata[20] __attribute__ ((aligned (64)));
-   uint32_t bedata1[2] __attribute__((aligned(64)));
-   uint32_t *pdata = work->data;
-   uint32_t *ptarget = work->target;
-   const uint32_t first_nonce = pdata[19];
-   const uint32_t last_nonce = max_nonce - 8;
-   uint32_t n = first_nonce;
-    __m512i  *noncev = (__m512i*)vdata + 9;   // aligned
-   const int thr_id = mythr->id;
-   volatile uint8_t *restart = &(work_restart[thr_id].restart);
-   const bool bench = opt_benchmark;

-   if ( bench ) ptarget[7] = 0x0cff;
-
-   mm512_bswap32_intrlv80_8x64( vdata, pdata );
-
-   bedata1[0] = bswap_32( pdata[1] );
-   bedata1[1] = bswap_32( pdata[2] );
-
-   static __thread uint32_t s_ntime = UINT32_MAX;
-   const uint32_t ntime = bswap_32( pdata[17] );
-   if ( s_ntime != ntime )
-   {
-      x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
-      s_ntime = ntime;
-      if ( opt_debug && !thr_id )
-         applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
-   }
-
-   // Do midstate prehash on hash functions with block size <= 64 bytes.
   const char elem = x16r_hash_order[0];
   const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
+
   switch ( algo )
   {
      case JH:
-         mm512_bswap32_intrlv80_8x64( vdata, pdata );
-         jh512_8way_init( &x16rv2_ctx.jh );
-         jh512_8way_update( &x16rv2_ctx.jh, vdata, 64 );
+         mm256_bswap32_intrlv80_4x64( vdata, pdata );
+         jh512_4way_init( &x16rv2_ctx.jh );
+         jh512_4way_update( &x16rv2_ctx.jh, vdata, 64 );
      break;
      case KECCAK:
      case LUFFA:
@@ -608,100 +622,45 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
         mm128_bswap32_80( edata, pdata );
         sph_tiger_init( &x16rv2_ctx.tiger );
         sph_tiger( &x16rv2_ctx.tiger, edata, 64 );
-         intrlv_8x64( vdata, edata, edata, edata, edata,
-                             edata, edata, edata, edata, 640 );
+         intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
      break;
      case SKEIN:
-         mm512_bswap32_intrlv80_8x64( vdata, pdata );
-         skein512_8way_init( &x16rv2_ctx.skein );
-         skein512_8way_update( &x16rv2_ctx.skein, vdata, 64 );
+         mm256_bswap32_intrlv80_4x64( vdata, pdata );
+         skein512_4way_prehash64( &x16r_ctx.skein, vdata );
      break;
      case CUBEHASH:
         mm128_bswap32_80( edata, pdata );
         cubehashInit( &x16rv2_ctx.cube, 512, 16, 32 );
         cubehashUpdate( &x16rv2_ctx.cube, (const byte*)edata, 64 );
-         intrlv_8x64( vdata, edata, edata, edata, edata,
-                             edata, edata, edata, edata, 640 );
+         intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
      break;
      case HAMSI:
-         mm512_bswap32_intrlv80_8x64( vdata, pdata );
-         hamsi512_8way_init( &x16rv2_ctx.hamsi );
-         hamsi512_8way_update( &x16rv2_ctx.hamsi, vdata, 64 );
+         mm256_bswap32_intrlv80_4x64( vdata, pdata );
+         hamsi512_4way_init( &x16rv2_ctx.hamsi );
+         hamsi512_4way_update( &x16rv2_ctx.hamsi, vdata, 64 );
      break;
      case SHABAL:
-         mm256_bswap32_intrlv80_8x32( vdata2, pdata );
-         shabal512_8way_init( &x16rv2_ctx.shabal );
-         shabal512_8way_update( &x16rv2_ctx.shabal, vdata2, 64 );
-         rintrlv_8x32_8x64( vdata, vdata2, 640 );
+         mm128_bswap32_intrlv80_4x32( vdata32, pdata );
+         shabal512_4way_init( &x16rv2_ctx.shabal );
+         shabal512_4way_update( &x16rv2_ctx.shabal, vdata32, 64 );
+         rintrlv_4x32_4x64( vdata, vdata32, 640 );
      break;
      case WHIRLPOOL:
         mm128_bswap32_80( edata, pdata );
         sph_whirlpool_init( &x16rv2_ctx.whirlpool );
         sph_whirlpool( &x16rv2_ctx.whirlpool, edata, 64 );
-         intrlv_8x64( vdata, edata, edata, edata, edata,
-                             edata, edata, edata, edata, 640 );
+         intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
      break;
      default:
-         mm512_bswap32_intrlv80_8x64( vdata, pdata );
+         mm256_bswap32_intrlv80_4x64( vdata, pdata );
   }
-   
-   *noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
-                             n+7, 0, n+6, 0, n+5, 0, n+4, 0,
-                             n+3, 0, n+2, 0, n+1, 0, n,   0 ), *noncev );
-   do
-   {
-      if ( x16rv2_8way_hash( hash, vdata, thr_id ) )
-      for ( int i = 0; i < 8; i++ )
-      if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
-      {
-         pdata[19] = bswap_32( n+i );
-         submit_solution( work, hash+(i<<3), mythr );
-      }
-      *noncev = _mm512_add_epi32( *noncev,
-                                  m512_const1_64( 0x0000000800000000 ) );
-      n += 8;
-   } while ( likely( ( n < last_nonce ) && !(*restart) ) );
-   pdata[19] = n;
-   *hashes_done = n - first_nonce;
-   return 0;
-}
+}   

-#elif defined (X16RV2_4WAY)
-
-union _x16rv2_4way_context_overlay
+int x16rv2_4way_prehash( struct work *work )
 {
-    blake512_4way_context   blake;
-    bmw512_4way_context     bmw;
-#if defined(__VAES__)
-    groestl512_2way_context groestl;
-    shavite512_2way_context shavite;
-    echo_2way_context       echo;
-#else
-    hashState_groestl       groestl;
-    shavite512_context      shavite;
-    hashState_echo          echo;
-#endif
-    skein512_4way_context   skein;
-    jh512_4way_context      jh;
-    keccak512_4way_context  keccak;
-    luffa_2way_context      luffa;
-    cubehashParam           cube;
-    simd_2way_context       simd;
-    hamsi512_4way_context   hamsi;
-    hashState_fugue         fugue;
-    shabal512_4way_context  shabal;
-    sph_whirlpool_context   whirlpool;
-    sha512_4way_context     sha512;
-    sph_tiger_context       tiger;
-};
-typedef union _x16rv2_4way_context_overlay x16rv2_4way_context_overlay;
-
-static __thread x16rv2_4way_context_overlay x16rv2_ctx;
-
-// Pad the 24 bytes tiger hash to 64 bytes
-inline void padtiger512( uint32_t* hash )
-{
-  for ( int i = 6; i < 16; i++ ) hash[i] = 0;
+   x16r_gate_get_hash_order( work, x16r_hash_order );
+   x16rv2_4way_do_prehash( x16r_4way_vdata, work->data );
+   return 1;
 }

 int x16rv2_4way_hash( void* output, const void* input, int thrid )
@@ -1048,107 +1007,4 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
   return 1;
 }

-int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
-                        uint64_t *hashes_done, struct thr_info *mythr)
-{
-   uint32_t hash[4*16] __attribute__ ((aligned (64)));
-   uint32_t vdata[24*4] __attribute__ ((aligned (64)));
-   uint32_t vdata32[20*4] __attribute__ ((aligned (64)));
-   uint32_t edata[20];
-   uint32_t bedata1[2];
-   uint32_t *pdata = work->data;
-   uint32_t *ptarget = work->target;
-   const uint32_t first_nonce = pdata[19];
-   const uint32_t last_nonce = max_nonce - 4;
-   uint32_t n = first_nonce;
-   const int thr_id = mythr->id; 
-    __m256i  *noncev = (__m256i*)vdata + 9; 
-   volatile uint8_t *restart = &(work_restart[thr_id].restart);
-   const bool bench = opt_benchmark;
-
-   if ( bench )  ptarget[7] = 0x0fff;
-   
-   bedata1[0] = bswap_32( pdata[1] );
-   bedata1[1] = bswap_32( pdata[2] );
-
-   static __thread uint32_t s_ntime = UINT32_MAX;
-   const uint32_t ntime = bswap_32(pdata[17]);
-   if ( s_ntime != ntime )
-   {
-      x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
-      s_ntime = ntime;
-      if ( opt_debug && !thr_id )
-         applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
-   }
-
-   // Do midstate prehash on hash functions with block size <= 64 bytes.
-   const char elem = x16r_hash_order[0];
-   const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
-   switch ( algo )
-   {
-      case JH:
-         mm256_bswap32_intrlv80_4x64( vdata, pdata );
-         jh512_4way_init( &x16rv2_ctx.jh );
-         jh512_4way_update( &x16rv2_ctx.jh, vdata, 64 );
-      break;
-      case KECCAK:
-      case LUFFA:
-      case SHA_512:
-         mm128_bswap32_80( edata, pdata );
-         sph_tiger_init( &x16rv2_ctx.tiger );
-         sph_tiger( &x16rv2_ctx.tiger, edata, 64 );
-         intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
-      break;
-      case SKEIN:
-         mm256_bswap32_intrlv80_4x64( vdata, pdata );
-         skein512_4way_prehash64( &x16r_ctx.skein, vdata );
-      break;
-      case CUBEHASH:
-         mm128_bswap32_80( edata, pdata );
-         cubehashInit( &x16rv2_ctx.cube, 512, 16, 32 );
-         cubehashUpdate( &x16rv2_ctx.cube, (const byte*)edata, 64 );
-         intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
-      break;
-      case HAMSI:
-         mm256_bswap32_intrlv80_4x64( vdata, pdata );
-         hamsi512_4way_init( &x16rv2_ctx.hamsi );
-         hamsi512_4way_update( &x16rv2_ctx.hamsi, vdata, 64 );
-      break;
-      case SHABAL:
-         mm128_bswap32_intrlv80_4x32( vdata32, pdata );
-         shabal512_4way_init( &x16rv2_ctx.shabal );
-         shabal512_4way_update( &x16rv2_ctx.shabal, vdata32, 64 );
-         rintrlv_4x32_4x64( vdata, vdata32, 640 );
-      break;
-      case WHIRLPOOL:
-         mm128_bswap32_80( edata, pdata );
-         sph_whirlpool_init( &x16rv2_ctx.whirlpool );
-         sph_whirlpool( &x16rv2_ctx.whirlpool, edata, 64 );
-         intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
-      break;
-      default:
-         mm256_bswap32_intrlv80_4x64( vdata, pdata );
-   }
-
-   *noncev = mm256_intrlv_blend_32(
-                   _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
-
-   do
-   {
-      if ( x16rv2_4way_hash( hash, vdata, thr_id ) )
-      for ( int i = 0; i < 4; i++ )
-      if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
-      {
-         pdata[19] = bswap_32( n+i );
-         submit_solution( work, hash+(i<<3), mythr );
-      }
-      *noncev = _mm256_add_epi32( *noncev,
-                                  m256_const1_64( 0x0000000400000000 ) );
-      n += 4;
-   } while ( likely( ( n < last_nonce ) && !(*restart) ) );
-   pdata[19] = n;
-   *hashes_done = n - first_nonce;
-   return 0;
-}
-
 #endif
--- a/algo/x16/x16rv2.c
+++ b/algo/x16/x16rv2.c
@@ -43,9 +43,16 @@ inline void padtiger512(uint32_t* hash) {
   for (int i = (24/4); i < (64/4); i++) hash[i] = 0;
 }

-int x16rv2_hash( void* output, const void* input, int thrid )
+// no prehash
+int x16rv2_prehash( const struct work *work )
 {
-   uint32_t _ALIGN(128) hash[16];
+   x16r_gate_get_hash_order( work, x16r_hash_order );
+   return 1;
+}
+
+int x16rv2_hash( void* output, const void* input, const int thrid )
+{
+   uint32_t _ALIGN(32) hash[16];
   x16rv2_context_overlay ctx;
   void *in = (void*) input;
   int size = 80;
@@ -170,52 +177,4 @@ int x16rv2_hash( void* output, const void* input, int thrid )
   return 1;
 }

-int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
-                   uint64_t *hashes_done, struct thr_info *mythr )
-{
-   uint32_t _ALIGN(128) hash32[8];
-   uint32_t _ALIGN(128) edata[20];
-   uint32_t *pdata = work->data;
-   uint32_t *ptarget = work->target;
-   const uint32_t first_nonce = pdata[19];
-   const int thr_id = mythr->id;  
-   uint32_t nonce = first_nonce;
-   volatile uint8_t *restart = &(work_restart[thr_id].restart);
-   const bool bench = opt_benchmark;
-
-   casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
-   casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
-   casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
-   casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
-   casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
-
-   static __thread uint32_t s_ntime = UINT32_MAX;
-   if ( s_ntime != pdata[17] )
-   {
-      uint32_t ntime = swab32(pdata[17]);
-      x16_r_s_getAlgoString( (const uint8_t*) (&edata[1]), x16r_hash_order );
-      s_ntime = ntime;
-      if ( opt_debug && !thr_id )
-              applog( LOG_DEBUG, "hash order %s (%08x)",
-                                 x16r_hash_order, ntime );
-   }
-
-   if ( bench )   ptarget[7] = 0x0cff;
-
-   do
-   {
-      edata[19] = nonce;
-      if ( x16rv2_hash( hash32, edata, thr_id ) )
-      if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
-      {
-         pdata[19] = bswap_32( nonce );
-         submit_solution( work, hash32, mythr );
-      }
-      nonce++;
-   } while ( nonce < max_nonce && !(*restart) );
-   pdata[19] = nonce;
-   *hashes_done = pdata[19] - first_nonce;
-   return 0;
-}
-
 #endif
--- a/algo/x16/x21s-4way.c
+++ b/algo/x16/x21s-4way.c
@@ -30,7 +30,7 @@ union _x21s_8way_context_overlay

 typedef union _x21s_8way_context_overlay x21s_8way_context_overlay;

-int x21s_8way_hash( void* output, const void* input, int thrid )
+int x21s_8way_hash( void* output, const void* input, const int thrid )
 {
   uint32_t vhash[16*8] __attribute__ ((aligned (128)));
   uint8_t shash[64*8] __attribute__ ((aligned (64)));
@@ -129,66 +129,6 @@ int x21s_8way_hash( void* output, const void* input, int thrid )
   return 1;
 }

-int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
-                        uint64_t *hashes_done, struct thr_info *mythr)
-{
-   uint32_t hash[16*8] __attribute__ ((aligned (128)));
-   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
-   uint32_t *hash7 = &hash[7<<3];
-   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-   uint32_t bedata1[2] __attribute__((aligned(64)));
-   uint32_t *pdata = work->data;
-   uint32_t *ptarget = work->target;
-   const uint32_t Htarg = ptarget[7];
-   const uint32_t first_nonce = pdata[19];
-   uint32_t n = first_nonce;
-   const uint32_t last_nonce = max_nonce - 16;
-   const int thr_id = mythr->id;
-    __m512i  *noncev = (__m512i*)vdata + 9;   // aligned
-   volatile uint8_t *restart = &(work_restart[thr_id].restart);
-   const bool bench = opt_benchmark;
-
-   if ( bench )   ptarget[7] = 0x0cff;
-
-   bedata1[0] = bswap_32( pdata[1] );
-   bedata1[1] = bswap_32( pdata[2] );
-
-   static __thread uint32_t s_ntime = UINT32_MAX;
-   uint32_t ntime = bswap_32( pdata[17] );
-   if ( s_ntime != ntime )
-   {
-      x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
-      s_ntime = ntime;
-      if ( opt_debug && !thr_id )
-              applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
-   }
-
-   x16r_8way_prehash( vdata, pdata );
-   *noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
-                             n+7, 0, n+6, 0, n+5, 0, n+4, 0,
-                             n+3, 0, n+2, 0, n+1, 0, n,   0 ), *noncev );
-   do
-   {
-      if ( x21s_8way_hash( hash, vdata, thr_id ) )
-      for ( int lane = 0; lane < 8; lane++ )
-      if ( unlikely( hash7[lane] <= Htarg ) )
-      {
-         extr_lane_8x32( lane_hash, hash, lane, 256 );
-         if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
-         {
-             pdata[19] = bswap_32( n + lane );
-             submit_solution( work, lane_hash, mythr );
-         }
-      }
-      *noncev = _mm512_add_epi32( *noncev,
-                                  m512_const1_64( 0x0000000800000000 ) );
-      n += 8;
-   } while ( likely( ( n < last_nonce ) && !(*restart) ) );
-   pdata[19] = n;
-   *hashes_done = n - first_nonce;
-   return 0;
-}
-
 bool x21s_8way_thread_init()
 {
   const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols
@@ -215,7 +155,7 @@ union _x21s_4way_context_overlay

 typedef union _x21s_4way_context_overlay x21s_4way_context_overlay;

-int x21s_4way_hash( void* output, const void* input, int thrid )
+int x21s_4way_hash( void* output, const void* input, const int thrid )
 {
   uint32_t vhash[16*4] __attribute__ ((aligned (64)));
   uint8_t  shash[64*4] __attribute__ ((aligned (64)));
@@ -291,58 +231,6 @@ int x21s_4way_hash( void* output, const void* input, int thrid )
   return 1;
 }

-int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
-                        uint64_t *hashes_done, struct thr_info *mythr)
-{
-   uint32_t hash[16*4] __attribute__ ((aligned (64)));
-   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
-   uint32_t bedata1[2] __attribute__((aligned(64)));
-   uint32_t *pdata = work->data;
-   uint32_t *ptarget = work->target;
-   const uint32_t first_nonce = pdata[19];
-   const uint32_t last_nonce = max_nonce - 4;
-   uint32_t n = first_nonce;
-   const int thr_id = mythr->id; 
-   const bool bench = opt_benchmark;
-    __m256i  *noncev = (__m256i*)vdata + 9;   // aligned
-   volatile uint8_t *restart = &(work_restart[thr_id].restart);
-
-   if ( bench )  ptarget[7] = 0x0cff;
- 
-   bedata1[0] = bswap_32( pdata[1] );
-   bedata1[1] = bswap_32( pdata[2] );
-
-   static __thread uint32_t s_ntime = UINT32_MAX;
-   uint32_t ntime = bswap_32( pdata[17] );
-   if ( s_ntime != ntime )
-   {
-      x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
-      s_ntime = ntime;
-      if ( opt_debug && !thr_id )
-              applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime );
-   }
-
-   x16r_4way_prehash( vdata, pdata );
-   *noncev = mm256_intrlv_blend_32(
-                   _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
-   do
-   {
-      if ( x21s_4way_hash( hash, vdata, thr_id ) )
-      for ( int i = 0; i < 4; i++ )
-      if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
-      {
-         pdata[19] = bswap_32( n+i );
-         submit_solution( work, hash+(i<<3), mythr );
-      }
-      *noncev = _mm256_add_epi32( *noncev,
-                                  m256_const1_64( 0x0000000400000000 ) );
-      n += 4;
-   } while ( likely( (  n < last_nonce ) && !(*restart) ) );
-   pdata[19] = n;
-   *hashes_done = n - first_nonce;
-   return 0;
-}
-
 bool x21s_4way_thread_init()
 {
   const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols
--- a/algo/x16/x21s.c
+++ b/algo/x16/x21s.c
@@ -27,7 +27,7 @@ union _x21s_context_overlay
 };
 typedef union _x21s_context_overlay x21s_context_overlay;

-int x21s_hash( void* output, const void* input, int thrid )
+int x21s_hash( void* output, const void* input, const int thrid )
 {
   uint32_t _ALIGN(128) hash[16];
   x21s_context_overlay ctx;
@@ -57,50 +57,6 @@ int x21s_hash( void* output, const void* input, int thrid )
   return 1;
 }

-int scanhash_x21s( struct work *work, uint32_t max_nonce,
-                   uint64_t *hashes_done, struct thr_info *mythr )
-{
-   uint32_t _ALIGN(128) hash32[8];
-   uint32_t _ALIGN(128) edata[20];
-   uint32_t *pdata = work->data;
-   uint32_t *ptarget = work->target;
-   const uint32_t first_nonce = pdata[19];
-   const int thr_id = mythr->id;
-   uint32_t nonce = first_nonce;
-   volatile uint8_t *restart = &(work_restart[thr_id].restart);
-   const bool bench = opt_benchmark;
-   if ( bench )  ptarget[7] = 0x0cff;
-
-   mm128_bswap32_80( edata, pdata );
-
-   static __thread uint32_t s_ntime = UINT32_MAX;
-   if ( s_ntime != pdata[17] )
-   {
-      uint32_t ntime = swab32(pdata[17]);
-      x16_r_s_getAlgoString( (const uint8_t*)(&edata[1]), x16r_hash_order );
-      s_ntime = ntime;
-      if ( opt_debug && !thr_id )
-          applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
-   }
-
-   x16r_prehash( edata, pdata );
-
-   do
-   {
-      edata[19] = nonce;
-      if ( x21s_hash( hash32, edata, thr_id ) )
-      if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
-      {
-         pdata[19] = bswap_32( nonce );
-         submit_solution( work, hash32, mythr );
-      }
-      nonce++;
-   } while ( nonce < max_nonce && !(*restart) );
-   pdata[19] = nonce;
-   *hashes_done = pdata[19] - first_nonce;
-   return 0;
-}
-
 bool x21s_thread_init()
 {
   const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols