diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 8bb7939..1fd50fa 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -65,6 +65,13 @@ If not what makes it happen or not happen?
 Change Log
 ----------
 
+v3.14.3
+
+#265: more mutex changes to reduce blocking with high thread count.
+
+#267: fixed hodl algo potential memory alignment issue,
+      add warning when thread count is not valid for mining hodl algo.
+
 v3.14.2
 
 The second line of the Share Accepted log is no longer displayed,
diff --git a/algo-gate-api.c b/algo-gate-api.c
index 410b664..38da868 100644
--- a/algo-gate-api.c
+++ b/algo-gate-api.c
@@ -128,6 +128,119 @@ int scanhash_generic( struct work *work, uint32_t max_nonce,
    return 0;
 }
 
+#if defined(__AVX2__)
+
+//int scanhash_4way_64_64( struct work *work, uint32_t max_nonce,
+//                      uint64_t *hashes_done, struct thr_info *mythr )
+
+//int scanhash_4way_64_640( struct work *work, uint32_t max_nonce,
+//                      uint64_t *hashes_done, struct thr_info *mythr )
+
+int scanhash_4way_64in_32out( struct work *work, uint32_t max_nonce,
+                      uint64_t *hashes_done, struct thr_info *mythr )
+{
+   uint32_t hash32[8*4] __attribute__ ((aligned (64)));
+   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
+   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
+   uint32_t *hash32_d7 = &(hash32[ 7*4 ]);
+   uint32_t *pdata = work->data;
+   const uint32_t *ptarget = work->target;
+   const uint32_t first_nonce = pdata[19];
+   const uint32_t last_nonce = max_nonce - 4;
+   __m256i  *noncev = (__m256i*)vdata + 9;
+   uint32_t n = first_nonce;
+   const int thr_id = mythr->id;
+   const uint32_t targ32_d7 = ptarget[7];
+   const bool bench = opt_benchmark;
+
+   mm256_bswap32_intrlv80_4x64( vdata, pdata );
+   *noncev = mm256_intrlv_blend_32(
+                   _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
+   do
+   {
+      if ( likely( algo_gate.hash( hash32, vdata, thr_id ) ) )
+      for ( int lane = 0; lane < 4; lane++ )
+      if ( unlikely( hash32_d7[ lane ] <= targ32_d7 && !bench ) )
+      {
+         extr_lane_4x32( lane_hash, hash32, lane, 256 );
+         if ( valid_hash( lane_hash, ptarget ) )
+         {
+            pdata[19] = bswap_32( n + lane );
+            submit_solution( work, lane_hash, mythr );
+         }
+      }
+      *noncev = _mm256_add_epi32( *noncev,
+                                  m256_const1_64( 0x0000000400000000 ) );
+      n += 4;
+   } while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
+   pdata[19] = n;
+   *hashes_done = n - first_nonce;
+   return 0;
+}
+
+//int scanhash_8way_32_32( struct work *work, uint32_t max_nonce,
+//                      uint64_t *hashes_done, struct thr_info *mythr )
+
+#endif
+
+#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
+
+//int scanhash_8way_64_64( struct work *work, uint32_t max_nonce,
+//                      uint64_t *hashes_done, struct thr_info *mythr )
+
+//int scanhash_8way_64_640( struct work *work, uint32_t max_nonce,
+//                      uint64_t *hashes_done, struct thr_info *mythr )
+
+int scanhash_8way_64in_32out( struct work *work, uint32_t max_nonce,
+                      uint64_t *hashes_done, struct thr_info *mythr )
+{
+   uint32_t hash32[8*8] __attribute__ ((aligned (128)));
+   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
+   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
+   uint32_t *hash32_d7 = &(hash32[7*8]);
+   uint32_t *pdata = work->data;
+   const uint32_t *ptarget = work->target;
+   const uint32_t first_nonce = pdata[19];
+   const uint32_t last_nonce = max_nonce - 8;
+   __m512i  *noncev = (__m512i*)vdata + 9;
+   uint32_t n = first_nonce;
+   const int thr_id = mythr->id;
+   const uint32_t targ32_d7 = ptarget[7];
+   const bool bench = opt_benchmark;
+
+   mm512_bswap32_intrlv80_8x64( vdata, pdata );
+   *noncev = mm512_intrlv_blend_32(
+              _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
+                                n+3, 0, n+2, 0, n+1, 0, n,   0 ), *noncev );
+   do
+   {
+      if ( likely( algo_gate.hash( hash32, vdata, thr_id ) ) )
+      for ( int lane = 0; lane < 8; lane++ )
+      if ( unlikely( ( hash32_d7[ lane ] <= targ32_d7 ) && !bench ) )
+      {
+         extr_lane_8x32( lane_hash, hash32, lane, 256 );
+         if ( likely( valid_hash( lane_hash, ptarget ) ) )
+         {
+            pdata[19] = bswap_32( n + lane );
+            submit_solution( work, lane_hash, mythr );
+         }
+      }
+      *noncev = _mm512_add_epi32( *noncev,
+                                  m512_const1_64( 0x0000000800000000 ) );
+      n += 8;
+   } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
+   pdata[19] = n;
+   *hashes_done = n - first_nonce;
+   return 0;
+}
+
+//int scanhash_16way_32_32( struct work *work, uint32_t max_nonce,
+//                      uint64_t *hashes_done, struct thr_info *mythr )
+
+#endif
+
+
+
 int null_hash()
 {
    applog(LOG_WARNING,"SWERR: null_hash unsafe null function");
diff --git a/algo-gate-api.h b/algo-gate-api.h
index 0110ace..80aa3b4 100644
--- a/algo-gate-api.h
+++ b/algo-gate-api.h
@@ -110,12 +110,12 @@ inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
 
 typedef struct
 {
-// Mandatory functions, one of these is mandatory. If the default scanhash
+// Mandatory functions, one of these is mandatory. If a generic scanhash
 // is used a custom hash function must be registered, with a custom scanhash
-// the hash function is not necessary. 
+// the custom hash function can be called directly and doesn't need to be
+// registered in the gate. 
 int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );
 
-//int ( *hash )     ( void*, const void*, uint32_t ) ;
 int ( *hash )     ( void*, const void*, int );
 
 //optional, safe to use default in most cases
@@ -203,19 +203,61 @@ void four_way_not_tested();
 #define STD_WORK_DATA_SIZE 128
 #define STD_WORK_CMP_SIZE 76
 
-#define JR2_NONCE_INDEX 39  // 8 bit offset
+//#define JR2_NONCE_INDEX 39  // 8 bit offset
 
 // These indexes are only used with JSON RPC2 and are not gated.
-#define JR2_WORK_CMP_INDEX_2 43
-#define JR2_WORK_CMP_SIZE_2 33
+//#define JR2_WORK_CMP_INDEX_2 43
+//#define JR2_WORK_CMP_SIZE_2 33
 
 // deprecated, use generic instead
 int null_scanhash();
 
 // Default generic, may be used in many cases.
+// N-way is more complicated, requires many different implementations
+// depending on architecture, input format, and output format.
+// Naming convention is scanhash_[N]way_[input format]in_[output format]out
+// N = number of lanes
+// input/output format:
+//    32: 32 bit interleaved parallel lanes
+//    64: 64 bit interleaved parallel lanes
+//    640: input only, not interleaved, contiguous serial 640 bit lanes.
+//    256: output only, not interleaved, contiguous serial 256 bit lanes.
+
 int scanhash_generic( struct work *work, uint32_t max_nonce,
                       uint64_t *hashes_done, struct thr_info *mythr );
 
+#if defined(__AVX2__)
+
+//int scanhash_4way_64in_64out( struct work *work, uint32_t max_nonce,
+//                      uint64_t *hashes_done, struct thr_info *mythr );
+
+//int scanhash_4way_64in_256out( struct work *work, uint32_t max_nonce,
+//                      uint64_t *hashes_done, struct thr_info *mythr );
+
+int scanhash_4way_64in_32out( struct work *work, uint32_t max_nonce,
+                      uint64_t *hashes_done, struct thr_info *mythr );
+
+//int scanhash_8way_32in_32out( struct work *work, uint32_t max_nonce,
+//                      uint64_t *hashes_done, struct thr_info *mythr );
+
+#endif
+
+#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
+
+//int scanhash_8way_64in_64out( struct work *work, uint32_t max_nonce,
+//                      uint64_t *hashes_done, struct thr_info *mythr );
+
+//int scanhash_8way_64in_256out( struct work *work, uint32_t max_nonce,
+//                      uint64_t *hashes_done, struct thr_info *mythr );
+
+int scanhash_8way_64in_32out( struct work *work, uint32_t max_nonce,
+                      uint64_t *hashes_done, struct thr_info *mythr );
+
+//int scanhash_16way_32in_32out( struct work *work, uint32_t max_nonce,
+//                      uint64_t *hashes_done, struct thr_info *mythr );
+
+#endif
+
 // displays warning
 int null_hash    ();
 
@@ -263,7 +305,7 @@ int std_get_work_data_size();
 // by calling the algo's register function.
 bool register_algo_gate( int algo, algo_gate_t *gate );
 
-// Called by algos toverride any default gate functions that are applicable
+// Called by algos to verride any default gate functions that are applicable
 // and do any other algo-specific initialization.
 // The register functions for all the algos can be declared here to reduce
 // compiler warnings but that's just more work for devs adding new algos.
diff --git a/algo/blake/decred-gate.c b/algo/blake/decred-gate.c
index 80b2900..0a90de7 100644
--- a/algo/blake/decred-gate.c
+++ b/algo/blake/decred-gate.c
@@ -78,7 +78,6 @@ void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
    uint32_t extraheader[32] = { 0 };
    int headersize = 0;
    uint32_t* extradata = (uint32_t*) sctx->xnonce1;
-   size_t t;
    int i;
 
    // getwork over stratum, getwork merkle + header passed in coinb1
@@ -87,9 +86,6 @@ void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
                   sizeof(extraheader) );
    memcpy( extraheader, &sctx->job.coinbase[32], headersize );
 
-   // Increment extranonce2 
-   for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
-
    // Assemble block header 
    memset( g_work->data, 0, sizeof(g_work->data) );
    g_work->data[0] = le32dec( sctx->job.version );
diff --git a/algo/hodl/hodl-gate.c b/algo/hodl/hodl-gate.c
index 930b026..2618309 100644
--- a/algo/hodl/hodl-gate.c
+++ b/algo/hodl/hodl-gate.c
@@ -99,13 +99,13 @@ void hodl_build_block_header( struct work* g_work, uint32_t version,
 // called only by thread 0, saves a backup of g_work
 void hodl_get_new_work( struct work* work, struct work* g_work)
 {
-   pthread_mutex_lock( &g_work_lock );
+   pthread_rwlock_rdlock( &g_work_lock );
 
    work_free( &hodl_work );
    work_copy( &hodl_work, g_work );
    hodl_work.data[ algo_gate.nonce_index ] = ( clock() + rand() ) % 9999;
 
-   pthread_mutex_unlock( &g_work_lock );
+   pthread_rwlock_unlock( &g_work_lock );
 }
 
 json_t *hodl_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
@@ -159,11 +159,10 @@ bool register_hodl_algo( algo_gate_t* gate )
   applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
   return false;
 #endif
-//  if ( TOTAL_CHUNKS % opt_n_threads )
-//  {
-//     applog(LOG_ERR,"Thread count must be power of 2.");
-//     return false;
-//  }
+
+  if ( GARBAGE_SIZE % opt_n_threads )
+     applog( LOG_WARNING,"WARNING: Thread count must be power of 2. Miner may crash or produce invalid hash!" );
+
   pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
   gate->optimizations         = SSE42_OPT | AES_OPT | AVX2_OPT;
   gate->scanhash              = (void*)&hodl_scanhash;
@@ -175,7 +174,7 @@ bool register_hodl_algo( algo_gate_t* gate )
   gate->resync_threads        = (void*)&hodl_resync_threads;
   gate->do_this_thread        = (void*)&hodl_do_this_thread;
   gate->work_cmp_size         = 76;
-  hodl_scratchbuf = (unsigned char*)malloc( 1 << 30 );
+  hodl_scratchbuf = (unsigned char*)_mm_malloc( 1 << 30, 64 );
   allow_getwork = false;
   opt_target_factor = 8388608.0;
   return ( hodl_scratchbuf != NULL );
diff --git a/algo/hodl/hodl-wolf.c b/algo/hodl/hodl-wolf.c
index ef580b2..6ff6175 100644
--- a/algo/hodl/hodl-wolf.c
+++ b/algo/hodl/hodl-wolf.c
@@ -70,7 +70,7 @@ int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
     uint32_t *ptarget = work->target;
     int threadNumber = mythr->id;
     CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf;
-    CacheEntry Cache[AES_PARALLEL_N];
+    CacheEntry Cache[AES_PARALLEL_N] __attribute__ ((aligned (64)));
     __m128i* data[AES_PARALLEL_N];
     const __m128i* next[AES_PARALLEL_N];
     uint32_t CollisionCount = 0;
diff --git a/algo/lyra2/lyra2-gate.c b/algo/lyra2/lyra2-gate.c
index 6e42778..ad62d05 100644
--- a/algo/lyra2/lyra2-gate.c
+++ b/algo/lyra2/lyra2-gate.c
@@ -215,9 +215,6 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
    size_t t;
 
    algo_gate.gen_merkle_root( merkle_tree, sctx );
-   // Increment extranonce2
-   for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
-   // Assemble block header
    algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
                   (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
                   le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits), NULL );
@@ -225,7 +222,6 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
       g_work->data[ 20+t ] = ((uint32_t*)sctx->job.extra)[t];
 }
 
-
 bool register_phi2_algo( algo_gate_t* gate )
 {
    gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
diff --git a/algo/nist5/zr5.c b/algo/nist5/zr5.c
index fa731bb..6152091 100644
--- a/algo/nist5/zr5.c
+++ b/algo/nist5/zr5.c
@@ -156,7 +156,7 @@ int scanhash_zr5( struct work *work, uint32_t max_nonce,
 void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
                        uint32_t* end_nonce_ptr )
 {
-   pthread_mutex_lock( &g_work_lock );
+   pthread_rwlock_rdlock( &g_work_lock );
 
    // ignore POK in first word
    const int wkcmp_sz = 72;  // (19-1) * sizeof(uint32_t)
@@ -174,7 +174,7 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
    else
        ++(*nonceptr);
 
-   pthread_mutex_unlock( &g_work_lock );
+   pthread_rwlock_unlock( &g_work_lock );
 }
 
 void zr5_display_pok( struct work* work )
diff --git a/algo/ripemd/lbry-gate.c b/algo/ripemd/lbry-gate.c
index f4080a8..ba38c65 100644
--- a/algo/ripemd/lbry-gate.c
+++ b/algo/ripemd/lbry-gate.c
@@ -69,13 +69,9 @@ void lbry_build_block_header( struct work* g_work, uint32_t version,
 void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
 {
    unsigned char merkle_root[64] = { 0 };
-   size_t t;
    int i;
 
    algo_gate.gen_merkle_root( merkle_root, sctx );
-   // Increment extranonce2 
-   for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
-   // Assemble block header 
 
    memset( g_work->data, 0, sizeof(g_work->data) );
    g_work->data[0] = le32dec( sctx->job.version );
diff --git a/algo/x16/minotaur.c b/algo/x16/minotaur.c
index fda02ae..069bf97 100644
--- a/algo/x16/minotaur.c
+++ b/algo/x16/minotaur.c
@@ -227,7 +227,7 @@ bool initialize_torture_garden()
 }
 
 // Produce a 32-byte hash from 80-byte input data
-int minotaur_hash( void *output, const void *input )
+int minotaur_hash( void *output, const void *input, int thr_id )
 {    
     unsigned char hash[64] __attribute__ ((aligned (64)));
 
diff --git a/algo/x16/x16r-gate.c b/algo/x16/x16r-gate.c
index 28ce579..462e264 100644
--- a/algo/x16/x16r-gate.c
+++ b/algo/x16/x16r-gate.c
@@ -135,18 +135,16 @@ void x16rt_getAlgoString( const uint32_t *timeHash, char *output)
 
 void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
 {
+   uint32_t merkleroothash[8];
+   uint32_t witmerkleroothash[8];
+   uint32_t denom10[8];
+   uint32_t denom100[8];
+   uint32_t denom1000[8];
+   uint32_t denom10000[8];
+   int i;
    uchar merkle_tree[64] = { 0 };
-   size_t t;
 
    algo_gate.gen_merkle_root( merkle_tree, sctx );
-   // Increment extranonce2
-   for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
-
-   // Assemble block header
-//   algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
-//          (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
-//          le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits) );
-   int i;
 
    memset( g_work->data, 0, sizeof(g_work->data) );
    g_work->data[0] = le32dec( sctx->job.version );
@@ -164,35 +162,35 @@ void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
    g_work->data[31] = 0x00000280;
 
    for ( i = 0; i < 8; i++ )
-      g_work->merkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i);
+      merkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i);
    for ( i = 0; i < 8; i++ )
-      g_work->witmerkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i);
+      witmerkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i);
    for ( i = 0; i < 8; i++ )
-      g_work->denom10[i] =    le32dec((uint32_t *)sctx->job.denom10 + i);
+      denom10[i] =    le32dec((uint32_t *)sctx->job.denom10 + i);
    for ( i = 0; i < 8; i++ )
-      g_work->denom100[i] =   le32dec((uint32_t *)sctx->job.denom100 + i);
+      denom100[i] =   le32dec((uint32_t *)sctx->job.denom100 + i);
    for ( i = 0; i < 8; i++ )
-      g_work->denom1000[i] =  le32dec((uint32_t *)sctx->job.denom1000 + i);
+      denom1000[i] =  le32dec((uint32_t *)sctx->job.denom1000 + i);
    for ( i = 0; i < 8; i++ )
-      g_work->denom10000[i] = le32dec((uint32_t *)sctx->job.denom10000 + i);
+      denom10000[i] = le32dec((uint32_t *)sctx->job.denom10000 + i);
 
    uint32_t pofnhash[8];
    memset(pofnhash, 0x00, 32);
 
-   char denom10_str      [ 2 * sizeof( g_work->denom10 )           + 1 ];
-   char denom100_str     [ 2 * sizeof( g_work->denom100 )          + 1 ];
-   char denom1000_str    [ 2 * sizeof( g_work->denom1000 )         + 1 ];
-   char denom10000_str   [ 2 * sizeof( g_work->denom10000 )        + 1 ];
-   char merkleroot_str   [ 2 * sizeof( g_work->merkleroothash )    + 1 ];
-   char witmerkleroot_str[ 2 * sizeof( g_work->witmerkleroothash ) + 1 ];
+   char denom10_str      [ 2 * sizeof( denom10 )           + 1 ];
+   char denom100_str     [ 2 * sizeof( denom100 )          + 1 ];
+   char denom1000_str    [ 2 * sizeof( denom1000 )         + 1 ];
+   char denom10000_str   [ 2 * sizeof( denom10000 )        + 1 ];
+   char merkleroot_str   [ 2 * sizeof( merkleroothash )    + 1 ];
+   char witmerkleroot_str[ 2 * sizeof( witmerkleroothash ) + 1 ];
    char pofn_str         [ 2 * sizeof( pofnhash )                  + 1 ];
 
-   cbin2hex( denom10_str,       (char*) g_work->denom10,           32 );
-   cbin2hex( denom100_str,      (char*) g_work->denom100,          32 );
-   cbin2hex( denom1000_str,     (char*) g_work->denom1000,         32 );
-   cbin2hex( denom10000_str,    (char*) g_work->denom10000,        32 );
-   cbin2hex( merkleroot_str,    (char*) g_work->merkleroothash,    32 );
-   cbin2hex( witmerkleroot_str, (char*) g_work->witmerkleroothash, 32 );
+   cbin2hex( denom10_str,       (char*) denom10,           32 );
+   cbin2hex( denom100_str,      (char*) denom100,          32 );
+   cbin2hex( denom1000_str,     (char*) denom1000,         32 );
+   cbin2hex( denom10000_str,    (char*) denom10000,        32 );
+   cbin2hex( merkleroot_str,    (char*) merkleroothash,    32 );
+   cbin2hex( witmerkleroot_str, (char*) witmerkleroothash, 32 );
    cbin2hex( pofn_str,          (char*) pofnhash,                  32 );
 
    if ( true )
diff --git a/algo/x17/sonoa-4way.c b/algo/x17/sonoa-4way.c
index 28ddd7e..a9e6b63 100644
--- a/algo/x17/sonoa-4way.c
+++ b/algo/x17/sonoa-4way.c
@@ -58,7 +58,7 @@ union _sonoa_8way_context_overlay
 
 typedef union _sonoa_8way_context_overlay sonoa_8way_context_overlay;
 
-int sonoa_8way_hash( void *state, const void *input, int thrid )
+int sonoa_8way_hash( void *state, const void *input, int thr_id )
 {
      uint64_t vhash[8*8] __attribute__ ((aligned (128)));
      uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
@@ -186,7 +186,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
 
 #endif
 
-     if ( work_restart[thrid].restart ) return 0;
+     if ( work_restart[thr_id].restart ) return 0;
 // 2
 
      bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
@@ -302,7 +302,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
      hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
      hamsi512_8way_close( &ctx.hamsi, vhash );
 
-     if ( work_restart[thrid].restart ) return 0;
+     if ( work_restart[thr_id].restart ) return 0;
 // 3
 
      bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
@@ -432,7 +432,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
      sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
      sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
 
-     if ( work_restart[thrid].restart ) return 0;
+     if ( work_restart[thr_id].restart ) return 0;
 // 4
 
      intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -630,7 +630,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
 
 #endif
 
-     if ( work_restart[thrid].restart ) return 0;
+     if ( work_restart[thr_id].restart ) return 0;
 // 5
 
      bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
@@ -783,7 +783,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
      sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
      sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );
 
-     if ( work_restart[thrid].restart ) return 0;
+     if ( work_restart[thr_id].restart ) return 0;
 // 6
 
      intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -952,7 +952,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
      sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
      sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );
 
-     if ( work_restart[thrid].restart ) return 0;
+     if ( work_restart[thr_id].restart ) return 0;
 // 7
 
      intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -1117,49 +1117,6 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
 
      return 1;
 }
-     
-int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
-                       uint64_t *hashes_done, struct thr_info *mythr )
-{
-   uint32_t hash[8*16] __attribute__ ((aligned (128)));
-   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
-   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-   uint32_t *hashd7 = &(hash[7<<3]);
-   uint32_t *pdata = work->data;
-   const uint32_t *ptarget = work->target;
-   const uint32_t first_nonce = pdata[19];
-   const uint32_t last_nonce = max_nonce - 8;
-   __m512i  *noncev = (__m512i*)vdata + 9;   // aligned
-   uint32_t n = first_nonce;
-   const int thr_id = mythr->id;
-   const uint32_t targ32 = ptarget[7];
-
-   mm512_bswap32_intrlv80_8x64( vdata, pdata );
-   *noncev = mm512_intrlv_blend_32(
-              _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
-                                n+3, 0, n+2, 0, n+1, 0, n,   0 ), *noncev );
-
-   do
-   {
-      if ( sonoa_8way_hash( hash, vdata, thr_id ) )
-      for ( int lane = 0; lane < 8; lane++ )
-      if unlikely( ( hashd7[ lane ] <= targ32 ) )
-      {
-         extr_lane_8x32( lane_hash, hash, lane, 256 );
-         if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
-         {
-            pdata[19] = bswap_32( n + lane );
-            submit_solution( work, lane_hash, mythr );
-         }
-      }
-      *noncev = _mm512_add_epi32( *noncev,
-                                  m512_const1_64( 0x0000000800000000 ) );
-      n += 8;
-   } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
-   pdata[19] = n;
-   *hashes_done = n - first_nonce;
-   return 0;
-}
 
 #elif defined(SONOA_4WAY)
 
@@ -1186,7 +1143,7 @@ union _sonoa_4way_context_overlay
 
 typedef union _sonoa_4way_context_overlay sonoa_4way_context_overlay;
 
-int sonoa_4way_hash( void *state, const void *input, int thrid )
+int sonoa_4way_hash( void *state, const void *input, int thr_id )
 {
      uint64_t hash0[8] __attribute__ ((aligned (64)));
      uint64_t hash1[8] __attribute__ ((aligned (64)));
@@ -1250,7 +1207,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
      echo_full( &ctx.echo, (BitSequence *)hash3, 512,
                      (const BitSequence *)hash3, 64 );
      
-     if ( work_restart[thrid].restart ) return 0;
+     if ( work_restart[thr_id].restart ) return 0;
 // 2
 
      intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1310,7 +1267,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
      hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
      hamsi512_4way_close( &ctx.hamsi, vhash );
 
-     if ( work_restart[thrid].restart ) return 0;
+     if ( work_restart[thr_id].restart ) return 0;
 // 3
 
      bmw512_4way_init( &ctx.bmw );
@@ -1375,7 +1332,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
      sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
      sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
 
-     if ( work_restart[thrid].restart ) return 0;
+     if ( work_restart[thr_id].restart ) return 0;
 // 4
      intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
 
@@ -1472,7 +1429,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
      shavite512_2way_init( &ctx.shavite );
      shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
 
-     if ( work_restart[thrid].restart ) return 0;
+     if ( work_restart[thr_id].restart ) return 0;
 // 5
      rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
 
@@ -1557,7 +1514,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
      sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
      sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
 
-     if ( work_restart[thrid].restart ) return 0;
+     if ( work_restart[thr_id].restart ) return 0;
 // 6
 
      intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1650,7 +1607,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
      sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
      sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
 
-     if ( work_restart[thrid].restart ) return 0;    
+     if ( work_restart[thr_id].restart ) return 0;    
 // 7
 
      intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1745,46 +1702,4 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
      return 1;
 }
 
-int scanhash_sonoa_4way( struct work *work, const uint32_t max_nonce,
-	            uint64_t *hashes_done, struct thr_info *mythr )
-{
-     uint32_t hash[4*16] __attribute__ ((aligned (64)));
-     uint32_t vdata[24*4] __attribute__ ((aligned (64)));
-     uint32_t lane_hash[8] __attribute__ ((aligned (32)));
-     uint32_t *hashd7 = &( hash[7<<2] );
-     uint32_t *pdata = work->data;
-     const uint32_t *ptarget = work->target;
-     const uint32_t first_nonce = pdata[19];
-     const uint32_t last_nonce = max_nonce - 4;
-     const uint32_t targ32 = ptarget[7];
-     uint32_t n = first_nonce;
-     __m256i  *noncev = (__m256i*)vdata + 9;  
-     const int thr_id = mythr->id;
-
-     mm256_bswap32_intrlv80_4x64( vdata, pdata );
-     *noncev = mm256_intrlv_blend_32(
-                   _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
-
-     do
-     {
-        if ( sonoa_4way_hash( hash, vdata, thr_id ) )
-        for ( int lane = 0; lane < 4; lane++ )
-        if ( unlikely( hashd7[ lane ] <= targ32 ) )
-        {
-           extr_lane_4x32( lane_hash, hash, lane, 256 );
-           if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
-           {
-              pdata[19] = bswap_32( n + lane );
-              submit_solution( work, lane_hash, mythr );
-           }
-        }
-        *noncev = _mm256_add_epi32( *noncev,
-                                    m256_const1_64( 0x0000000400000000 ) );
-        n += 4;
-     } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
-     pdata[19] = n;
-     *hashes_done = n - first_nonce;
-     return 0;
-}
-
 #endif
diff --git a/algo/x17/sonoa-gate.c b/algo/x17/sonoa-gate.c
index a187913..926beb4 100644
--- a/algo/x17/sonoa-gate.c
+++ b/algo/x17/sonoa-gate.c
@@ -3,14 +3,13 @@
 bool register_sonoa_algo( algo_gate_t* gate )
 {
 #if defined (SONOA_8WAY)
-  gate->scanhash  = (void*)&scanhash_sonoa_8way;
+  gate->scanhash  = (void*)&scanhash_8way_64in_32out;
   gate->hash      = (void*)&sonoa_8way_hash;
 #elif defined (SONOA_4WAY)
-  gate->scanhash  = (void*)&scanhash_sonoa_4way;
+  gate->scanhash  = (void*)&scanhash_4way_64in_32out;
   gate->hash      = (void*)&sonoa_4way_hash;
 #else
   init_sonoa_ctx();
-//  gate->scanhash  = (void*)&scanhash_sonoa;
   gate->hash      = (void*)&sonoa_hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
diff --git a/algo/x17/sonoa-gate.h b/algo/x17/sonoa-gate.h
index 895bcd2..997bff1 100644
--- a/algo/x17/sonoa-gate.h
+++ b/algo/x17/sonoa-gate.h
@@ -14,21 +14,15 @@ bool register_sonoa_algo( algo_gate_t* gate );
 
 #if defined(SONOA_8WAY)
 
-int sonoa_8way_hash( void *state, const void *input, int thrid );
-int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
-                         uint64_t *hashes_done, struct thr_info *mythr );
+int sonoa_8way_hash( void *state, const void *input, int thr_id );
 
 #elif defined(SONOA_4WAY)
 
-int sonoa_4way_hash( void *state, const void *input, int thrid );
-int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
-                         uint64_t *hashes_done, struct thr_info *mythr );
+int sonoa_4way_hash( void *state, const void *input, int thr_id );
 
 #else
 
-int sonoa_hash( void *state, const void *input, int thrid );
-int scanhash_sonoa( struct work *work, uint32_t max_nonce,
-                  uint64_t *hashes_done, struct thr_info *mythr );
+int sonoa_hash( void *state, const void *input, int thr_id );
 void init_sonoa_ctx();
 
 #endif
diff --git a/algo/x17/sonoa.c b/algo/x17/sonoa.c
index 502283c..19dbcb7 100644
--- a/algo/x17/sonoa.c
+++ b/algo/x17/sonoa.c
@@ -83,7 +83,7 @@ void init_sonoa_ctx()
         sph_haval256_5_init(&sonoa_ctx.haval);
 };
 
-int sonoa_hash( void *state, const void *input, int thrid )
+int sonoa_hash( void *state, const void *input, int thr_id )
 {
 	uint8_t hash[128] __attribute__ ((aligned (64)));
    sonoa_ctx_holder ctx __attribute__ ((aligned (64)));
@@ -132,7 +132,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
    sph_echo512_close(&ctx.echo, hash);
 #endif
 
-   if ( work_restart[thrid].restart ) return 0;
+   if ( work_restart[thr_id].restart ) return 0;
 //
 
    sph_bmw512_init( &ctx.bmw);
@@ -190,7 +190,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
    sph_hamsi512(&ctx.hamsi, hash, 64);
    sph_hamsi512_close(&ctx.hamsi, hash);
 	
-   if ( work_restart[thrid].restart ) return 0;
+   if ( work_restart[thr_id].restart ) return 0;
 //
 
    sph_bmw512_init( &ctx.bmw);
@@ -252,7 +252,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
    sph_fugue512(&ctx.fugue, hash, 64);
    sph_fugue512_close(&ctx.fugue, hash);
 
-   if ( work_restart[thrid].restart ) return 0;
+   if ( work_restart[thr_id].restart ) return 0;
 //
 
    sph_bmw512_init( &ctx.bmw);
@@ -336,7 +336,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
    sph_shavite512(&ctx.shavite, hash, 64);
    sph_shavite512_close(&ctx.shavite, hash);
 
-   if ( work_restart[thrid].restart ) return 0;
+   if ( work_restart[thr_id].restart ) return 0;
 //
 
    sph_bmw512_init( &ctx.bmw);
@@ -410,7 +410,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
    sph_whirlpool(&ctx.whirlpool, hash, 64);
    sph_whirlpool_close(&ctx.whirlpool, hash);
 
-   if ( work_restart[thrid].restart ) return 0;
+   if ( work_restart[thr_id].restart ) return 0;
 //
    sph_bmw512_init( &ctx.bmw);
    sph_bmw512(&ctx.bmw, hash, 64);
@@ -487,7 +487,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
    sph_whirlpool(&ctx.whirlpool, hash, 64);
    sph_whirlpool_close(&ctx.whirlpool, hash);
 
-   if ( work_restart[thrid].restart ) return 0;
+   if ( work_restart[thr_id].restart ) return 0;
 //
 
    sph_bmw512_init( &ctx.bmw);
diff --git a/algo/x17/x17-4way.c b/algo/x17/x17-4way.c
index 30e4659..4fe98bc 100644
--- a/algo/x17/x17-4way.c
+++ b/algo/x17/x17-4way.c
@@ -57,7 +57,7 @@ union _x17_8way_context_overlay
 } __attribute__ ((aligned (64)));
 typedef union _x17_8way_context_overlay x17_8way_context_overlay;
 
-int x17_8way_hash( void *state, const void *input )
+int x17_8way_hash( void *state, const void *input, int thr_id )
 {
      uint64_t vhash[8*8] __attribute__ ((aligned (128)));
      uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
@@ -234,50 +234,6 @@ int x17_8way_hash( void *state, const void *input )
      return 1;
 }
 
-int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
-                       uint64_t *hashes_done, struct thr_info *mythr )
-{
-   uint32_t hash32[8*8] __attribute__ ((aligned (128)));
-   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
-   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-   uint32_t *hash32_d7 = &(hash32[7*8]);
-   uint32_t *pdata = work->data;
-   const uint32_t *ptarget = work->target;
-   const uint32_t first_nonce = pdata[19];
-   const uint32_t last_nonce = max_nonce - 8;
-   __m512i  *noncev = (__m512i*)vdata + 9; 
-   uint32_t n = first_nonce;
-   const int thr_id = mythr->id;
-   const uint32_t targ32_d7 = ptarget[7];
-   const bool bench = opt_benchmark;
-
-   mm512_bswap32_intrlv80_8x64( vdata, pdata );
-   *noncev = mm512_intrlv_blend_32(
-              _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
-                                n+3, 0, n+2, 0, n+1, 0, n,   0 ), *noncev );
-   do
-   {
-      x17_8way_hash( hash32, vdata );
-
-      for ( int lane = 0; lane < 8; lane++ )
-      if ( unlikely( ( hash32_d7[ lane ] <= targ32_d7 ) && !bench ) )
-      {
-         extr_lane_8x32( lane_hash, hash32, lane, 256 );
-         if ( likely( valid_hash( lane_hash, ptarget ) ) )
-         {
-            pdata[19] = bswap_32( n + lane );
-            submit_solution( work, lane_hash, mythr );
-         }
-      }
-      *noncev = _mm512_add_epi32( *noncev,
-                                  m512_const1_64( 0x0000000800000000 ) );
-      n += 8;
-   } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
-   pdata[19] = n;
-   *hashes_done = n - first_nonce;
-   return 0;
-}
-
 #elif defined(X17_4WAY)
 
 union _x17_4way_context_overlay
@@ -302,7 +258,7 @@ union _x17_4way_context_overlay
 };  
 typedef union _x17_4way_context_overlay x17_4way_context_overlay;
 
-int x17_4way_hash( void *state, const void *input )
+int x17_4way_hash( void *state, const void *input, int thr_id )
 {
      uint64_t vhash[8*4] __attribute__ ((aligned (64)));
      uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
@@ -405,47 +361,4 @@ int x17_4way_hash( void *state, const void *input )
      return 1;
 }
 
-int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
-                       uint64_t *hashes_done, struct thr_info *mythr )
-{
-   uint32_t hash32[8*4] __attribute__ ((aligned (64)));
-   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
-   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-   uint32_t *hash32_d7 = &(hash32[ 7*4 ]);
-   uint32_t *pdata = work->data;
-   const uint32_t *ptarget = work->target;
-   const uint32_t first_nonce = pdata[19];
-   const uint32_t last_nonce = max_nonce - 4;
-   __m256i  *noncev = (__m256i*)vdata + 9;
-   uint32_t n = first_nonce;
-   const int thr_id = mythr->id;
-   const uint32_t targ32_d7 = ptarget[7];
-   const bool bench = opt_benchmark;
-
-   mm256_bswap32_intrlv80_4x64( vdata, pdata );
-   *noncev = mm256_intrlv_blend_32(
-                   _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
-   do
-   {
-      x17_4way_hash( hash32, vdata );
-
-      for ( int lane = 0; lane < 4; lane++ )
-      if ( unlikely( hash32_d7[ lane ] <= targ32_d7 && !bench ) )
-      {  
-         extr_lane_4x32( lane_hash, hash32, lane, 256 );
-         if ( valid_hash( lane_hash, ptarget ) )
-         {
-            pdata[19] = bswap_32( n + lane );
-            submit_solution( work, lane_hash, mythr );
-         }            
-      }
-      *noncev = _mm256_add_epi32( *noncev,
-                                  m256_const1_64( 0x0000000400000000 ) );
-      n += 4;
-   } while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
-   pdata[19] = n;
-   *hashes_done = n - first_nonce;
-   return 0;
-}
-
 #endif
diff --git a/algo/x17/x17-gate.c b/algo/x17/x17-gate.c
index 4ebfa0b..eee3d60 100644
--- a/algo/x17/x17-gate.c
+++ b/algo/x17/x17-gate.c
@@ -3,10 +3,10 @@
 bool register_x17_algo( algo_gate_t* gate )
 {
 #if defined (X17_8WAY)
-  gate->scanhash  = (void*)&scanhash_x17_8way;
+  gate->scanhash  = (void*)&scanhash_8way_64in_32out;
   gate->hash      = (void*)&x17_8way_hash;
 #elif defined (X17_4WAY)
-  gate->scanhash  = (void*)&scanhash_x17_4way;
+  gate->scanhash  = (void*)&scanhash_4way_64in_32out;
   gate->hash      = (void*)&x17_4way_hash;
 #else
   gate->hash      = (void*)&x17_hash;
diff --git a/algo/x17/x17-gate.h b/algo/x17/x17-gate.h
index 1b8ada4..003d77f 100644
--- a/algo/x17/x17-gate.h
+++ b/algo/x17/x17-gate.h
@@ -14,14 +14,11 @@ bool register_x17_algo( algo_gate_t* gate );
 
 #if defined(X17_8WAY)
 
-int x17_8way_hash( void *state, const void *input );
-int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
-                       uint64_t *hashes_done, struct thr_info *mythr );
+int x17_8way_hash( void *state, const void *input, int thr_id );
+
 #elif defined(X17_4WAY)
 
-int x17_4way_hash( void *state, const void *input );
-int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
-                       uint64_t *hashes_done, struct thr_info *mythr );
+int x17_4way_hash( void *state, const void *input, int thr_id );
 
 #endif
 
diff --git a/algo/x17/xevan-4way.c b/algo/x17/xevan-4way.c
index 3f1ff14..fbf5d26 100644
--- a/algo/x17/xevan-4way.c
+++ b/algo/x17/xevan-4way.c
@@ -57,7 +57,7 @@ union _xevan_8way_context_overlay
 } __attribute__ ((aligned (64)));
 typedef union _xevan_8way_context_overlay xevan_8way_context_overlay;
 
-int xevan_8way_hash( void *output, const void *input )
+int xevan_8way_hash( void *output, const void *input, int thr_id )
 {
      uint64_t vhash[16<<3] __attribute__ ((aligned (128)));
      uint64_t vhashA[16<<3] __attribute__ ((aligned (64)));
@@ -399,50 +399,6 @@ int xevan_8way_hash( void *output, const void *input )
      return 1;
 }
 
-int scanhash_xevan_8way( struct work *work, uint32_t max_nonce,
-                       uint64_t *hashes_done, struct thr_info *mythr )
-{
-   uint32_t hash[8*8] __attribute__ ((aligned (128)));
-   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
-   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-   uint32_t *hashd7 = &(hash[7*8]);
-   uint32_t *pdata = work->data;
-   const uint32_t *ptarget = work->target;
-   const uint32_t first_nonce = pdata[19];
-   const uint32_t last_nonce = max_nonce - 8;
-   __m512i  *noncev = (__m512i*)vdata + 9;
-   uint32_t n = first_nonce;
-   const int thr_id = mythr->id;
-   const uint32_t targ32 = ptarget[7];
-   const bool bench = opt_benchmark;
-
-   mm512_bswap32_intrlv80_8x64( vdata, pdata );
-   *noncev = mm512_intrlv_blend_32(
-              _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
-                                n+3, 0, n+2, 0, n+1, 0, n,   0 ), *noncev );
-   do
-   {
-      xevan_8way_hash( hash, vdata );
-
-      for ( int lane = 0; lane < 8; lane++ )
-      if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
-      {
-         extr_lane_8x32( lane_hash, hash, lane, 256 );
-         if ( likely( valid_hash( lane_hash, ptarget ) ) )
-         {
-            pdata[19] = bswap_32( n + lane );
-            submit_solution( work, lane_hash, mythr );
-         }
-      }
-      *noncev = _mm512_add_epi32( *noncev,
-                                  m512_const1_64( 0x0000000800000000 ) );
-      n += 8;
-   } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
-   pdata[19] = n;
-   *hashes_done = n - first_nonce;
-   return 0;
-}
-
 #elif defined(XEVAN_4WAY)
 
 union _xevan_4way_context_overlay
@@ -467,7 +423,7 @@ union _xevan_4way_context_overlay
 };
 typedef union _xevan_4way_context_overlay xevan_4way_context_overlay;
 
-int xevan_4way_hash( void *output, const void *input )
+int xevan_4way_hash( void *output, const void *input, int thr_id )
 {
      uint64_t hash0[16] __attribute__ ((aligned (64)));
      uint64_t hash1[16] __attribute__ ((aligned (64)));
@@ -672,47 +628,4 @@ int xevan_4way_hash( void *output, const void *input )
      return 1;
 }
 
-int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
-                         uint64_t *hashes_done, struct thr_info *mythr )
-{
-   uint32_t hash[16*4] __attribute__ ((aligned (128)));
-   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
-   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-   uint32_t *hashd7 = &(hash[7<<2]);
-   uint32_t *pdata = work->data;
-   uint32_t *ptarget = work->target;
-   int thr_id = mythr->id;
-   __m256i  *noncev = (__m256i*)vdata + 9; 
-   const uint32_t targ32 = ptarget[7];
-   const uint32_t first_nonce = pdata[19];
-   const uint32_t last_nonce = max_nonce - 4;
-   uint32_t n = first_nonce;
-   const bool bench = opt_benchmark;
-
-   if ( bench )  ptarget[7] = 0x0cff;
-
-   mm256_bswap32_intrlv80_4x64( vdata, pdata );
-   *noncev = mm256_intrlv_blend_32(
-                   _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
-   do {
-      xevan_4way_hash( hash, vdata );
-      for ( int lane = 0; lane < 4; lane++ )
-      if ( unlikely( hashd7[ lane ] <= targ32 ) && ! bench )
-      {
-         extr_lane_4x32( lane_hash, hash, lane, 256 );
-	      if ( valid_hash( lane_hash, ptarget ) )
-         {
-             pdata[19] = bswap_32( n + lane );
-             submit_solution( work, lane_hash, mythr );
-         }
-      }
-      *noncev = _mm256_add_epi32( *noncev,
-                                  m256_const1_64( 0x0000000400000000 ) );
-      n += 4;
-   } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
-   pdata[19] = n;
-   *hashes_done = n - first_nonce;
-   return 0;
-}
-
 #endif
diff --git a/algo/x17/xevan-gate.c b/algo/x17/xevan-gate.c
index b129330..184ed2d 100644
--- a/algo/x17/xevan-gate.c
+++ b/algo/x17/xevan-gate.c
@@ -3,10 +3,10 @@
 bool register_xevan_algo( algo_gate_t* gate )
 {
 #if defined (XEVAN_8WAY)
-  gate->scanhash  = (void*)&scanhash_xevan_8way;
+  gate->scanhash  = (void*)&scanhash_8way_64in_32out;
   gate->hash      = (void*)&xevan_8way_hash;
 #elif defined (XEVAN_4WAY)
-  gate->scanhash  = (void*)&scanhash_xevan_4way;
+  gate->scanhash  = (void*)&scanhash_4way_64in_32out;
   gate->hash      = (void*)&xevan_4way_hash;
 #else
   init_xevan_ctx();
diff --git a/algo/x17/xevan-gate.h b/algo/x17/xevan-gate.h
index 488d1c0..8ef9a2e 100644
--- a/algo/x17/xevan-gate.h
+++ b/algo/x17/xevan-gate.h
@@ -14,16 +14,11 @@ bool register_xevan_algo( algo_gate_t* gate );
 
 #if defined(XEVAN_8WAY)
 
-int xevan_8way_hash( void *state, const void *input );
-int scanhash_xevan_8way( struct work *work, uint32_t max_nonce,
-                       uint64_t *hashes_done, struct thr_info *mythr );
+int xevan_8way_hash( void *state, const void *input, int thr_id );
+
 #elif defined(XEVAN_4WAY)
 
-int xevan_4way_hash( void *state, const void *input );
-int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
-                       uint64_t *hashes_done, struct thr_info *mythr );
-
-//void init_xevan_4way_ctx();
+int xevan_4way_hash( void *state, const void *input, int thr_id );
 
 #else
 
diff --git a/configure b/configure
index 19d2efa..d427b66 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.14.2.
+# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.14.3.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='cpuminer-opt'
 PACKAGE_TARNAME='cpuminer-opt'
-PACKAGE_VERSION='3.14.2'
-PACKAGE_STRING='cpuminer-opt 3.14.2'
+PACKAGE_VERSION='3.14.3'
+PACKAGE_STRING='cpuminer-opt 3.14.3'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures cpuminer-opt 3.14.2 to adapt to many kinds of systems.
+\`configure' configures cpuminer-opt 3.14.3 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1404,7 +1404,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of cpuminer-opt 3.14.2:";;
+     short | recursive ) echo "Configuration of cpuminer-opt 3.14.3:";;
    esac
   cat <<\_ACEOF
 
@@ -1509,7 +1509,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-cpuminer-opt configure 3.14.2
+cpuminer-opt configure 3.14.3
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by cpuminer-opt $as_me 3.14.2, which was
+It was created by cpuminer-opt $as_me 3.14.3, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2993,7 +2993,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='cpuminer-opt'
- VERSION='3.14.2'
+ VERSION='3.14.3'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by cpuminer-opt $as_me 3.14.2, which was
+This file was extended by cpuminer-opt $as_me 3.14.3, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-cpuminer-opt config.status 3.14.2
+cpuminer-opt config.status 3.14.3
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/configure.ac b/configure.ac
index 28addf0..9654d17 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([cpuminer-opt], [3.14.2])
+AC_INIT([cpuminer-opt], [3.14.3])
 
 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
diff --git a/cpu-miner.c b/cpu-miner.c
index 6fdc6bd..0cad187 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -92,7 +92,7 @@ bool want_longpoll = false;
 bool have_longpoll = false;
 bool have_gbt = true;
 bool allow_getwork = true;
-bool want_stratum = true;
+bool want_stratum = true;    // pretty useless
 bool have_stratum = false;
 bool allow_mininginfo = true;
 bool use_syslog = false;
@@ -215,7 +215,7 @@ static char const short_options[] =
 
 static struct work g_work __attribute__ ((aligned (64))) = {{ 0 }};
 time_t g_work_time = 0;
-pthread_mutex_t g_work_lock;
+pthread_rwlock_t g_work_lock;
 static bool   submit_old = false;
 char*  lp_id;
 
@@ -1232,7 +1232,7 @@ static int share_result( int result, struct work *work,
 
    if ( use_colors )
    {
-     bcol = acol = scol = rcol = CL_N;
+     bcol = acol = scol = rcol = CL_WHT;
      if ( likely( result ) )
      {
        acol = CL_WHT CL_GRN;  
@@ -1242,27 +1242,22 @@ static int share_result( int result, struct work *work,
      else              rcol = CL_WHT CL_RED;
    }
 
-   applog( LOG_NOTICE, "%d %s%s %s%s %s%s %s%s" CL_N ", %.3f sec (%dms)",
+   applog( LOG_NOTICE, "%d %s%s %s%s %s%s %s%s" CL_WHT ", %.3f sec (%dms)",
            my_stats.share_count, acol, ares, scol, sres, rcol, rres, bcol,
            bres, share_time, latency );
 
-/*   
-   if ( !opt_quiet )
+   if ( unlikely( opt_debug || !result || solved ) )
    {
       if ( have_stratum )
-         applog2( LOG_INFO, "Diff %.5g (%.3g), %sBlock %d" CL_N ", %sJob %s",
-               my_stats.share_diff, share_ratio, bcol, stratum.block_height,
-               scol, my_stats.job_id );
+         applog2( LOG_INFO, "Diff %.5g, Block %d, Job %s",
+               my_stats.share_diff, stratum.block_height,
+               my_stats.job_id );
       else
-      {
-         uint64_t height = work ? work->height : last_block_height;
-         applog2( LOG_INFO, "Diff %.5g (%.3g), %sBlock %d",
-               my_stats.share_diff, share_ratio, bcol, height );
-      }
+         applog2( LOG_INFO, "Diff %.5g, Block %d",
+               my_stats.share_diff, work ? work->height : last_block_height );
    }
-*/
 
-   if ( unlikely( opt_debug || !( opt_quiet || result || stale ) ) )
+   if ( unlikely( !( opt_quiet || result || stale ) ) )
    {
       uint32_t str[8];
 
@@ -1835,9 +1830,9 @@ bool submit_solution( struct work *work, const void *hash,
 
      if unlikely( !have_stratum && !have_longpoll )
      {   // block solved, force getwork
-         pthread_mutex_lock( &g_work_lock );
+         pthread_rwlock_wrlock( &g_work_lock );
          g_work_time = 0;
-         pthread_mutex_unlock( &g_work_lock );
+         pthread_rwlock_unlock( &g_work_lock );
      }
 
      if ( !opt_quiet )
@@ -1960,7 +1955,7 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
    uint32_t *nonceptr = work->data + algo_gate.nonce_index;
    bool force_new_work = false; 
 
-   pthread_mutex_lock( &g_work_lock );
+   pthread_rwlock_rdlock( &g_work_lock );
    
    if ( have_stratum ) 
       force_new_work = work->job_id ?    strtoul(   work->job_id, NULL, 16 )
@@ -1978,7 +1973,7 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
    else
        ++(*nonceptr);
 
-   pthread_mutex_unlock( &g_work_lock );
+   pthread_rwlock_unlock( &g_work_lock );
 }
 
 bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
@@ -1998,7 +1993,7 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
    bool new_job =  *get_stratum_job_ntime()
                    != g_work->data[ algo_gate.ntime_index ];
 
-   pthread_mutex_lock( &g_work_lock );
+   pthread_rwlock_wrlock( &g_work_lock );
    pthread_mutex_lock( &sctx->work_lock );
 
    free( g_work->job_id );
@@ -2013,11 +2008,13 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
    g_work->targetdiff = sctx->job.diff
                            / ( opt_target_factor * opt_diff_factor );
    diff_to_hash( g_work->target, g_work->targetdiff );
+   // Increment extranonce2
+   for ( int t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
    g_work_time = time(NULL);
    restart_threads();
 
    pthread_mutex_unlock( &sctx->work_lock );
-   pthread_mutex_unlock( &g_work_lock );
+   pthread_rwlock_unlock( &g_work_lock );
 
    pthread_mutex_lock( &stats_lock );
 
@@ -2037,11 +2034,11 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
    else if ( new_job && g_work->job_id )
       applog( LOG_BLUE, "New Work: Block %d, Net diff %.5g, Job %s",
                          sctx->block_height, net_diff, g_work->job_id );
-   else if ( opt_debug )
+   else if ( !opt_quiet )
    {
       unsigned char *xnonce2str = abin2hex( g_work->xnonce2,
                                             g_work->xnonce2_len );
-      applog( LOG_INFO, "Extranonce2 %s, Block %d, Net Diff %.5g",
+      applog( LOG_INFO, "Extranonce %s, Block %d, Net Diff %.5g",
                   xnonce2str, sctx->block_height, net_diff );
       free( xnonce2str );
    }
@@ -2222,24 +2219,24 @@ static void *miner_thread( void *userdata )
           }
           else
           {
-             int scantime = have_longpoll ? LP_SCANTIME : opt_scantime;
-	          pthread_mutex_lock( &g_work_lock );
+             pthread_rwlock_wrlock( &g_work_lock );
 
-             if ( ( ( time(NULL) - g_work_time ) >= scantime )
+             if ( ( ( time(NULL) - g_work_time )
+                 >= ( have_longpoll ? LP_SCANTIME : opt_scantime ) )
                || ( *nonceptr >= end_nonce ) )
              {
                 if ( unlikely( !get_work( mythr, &g_work ) ) )
                 {
-                   pthread_mutex_unlock( &g_work_lock );
+                   pthread_rwlock_unlock( &g_work_lock );
 		             applog( LOG_ERR, "work retrieval failed, exiting "
-		                                      "mining thread %d", thr_id );
+		                              "mining thread %d", thr_id );
 		             goto out;
 	             }
                 g_work_time = time(NULL);
                 restart_threads();
              }
 
-             pthread_mutex_unlock( &g_work_lock );
+             pthread_rwlock_unlock( &g_work_lock );
           }
 
           algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce );
@@ -2349,10 +2346,10 @@ static void *miner_thread( void *userdata )
           // we can't submit twice a block!
           if unlikely( !have_stratum && !have_longpoll )
           {
-             pthread_mutex_lock( &g_work_lock );
+             pthread_rwlock_wrlock( &g_work_lock );
              // will force getwork
              g_work_time = 0;
-             pthread_mutex_unlock( &g_work_lock );
+             pthread_rwlock_unlock( &g_work_lock );
           }
        }
 
@@ -2384,11 +2381,9 @@ static void *miner_thread( void *userdata )
              if ( use_colors && ( curr_temp >= 70 ) )
              {
                 if ( curr_temp >= 80 )
-                   sprintf( tempstr, "%s%d C%s",
-                                     CL_WHT CL_RED, curr_temp, CL_N );
+                   sprintf( tempstr, "%s%d C%s", CL_RED, curr_temp, CL_WHT );
                 else
-                   sprintf( tempstr, "%s%d C%s",
-                                     CL_WHT CL_YLW, curr_temp, CL_N );
+                   sprintf( tempstr, "%s%d C%s", CL_YLW, curr_temp, CL_WHT );
              }
              else
                 sprintf( tempstr, "%d C", curr_temp );
@@ -2539,7 +2534,8 @@ start:
 	   res = json_object_get(val, "result");
       soval = json_object_get(res, "submitold");
       submit_old = soval ? json_is_true(soval) : false;
-	   pthread_mutex_lock(&g_work_lock);
+
+      pthread_rwlock_wrlock( &g_work_lock );
 
 // This code has been here for a long time even though job_id isn't used.
 // This needs to be changed eventually to test the block height properly
@@ -2573,14 +2569,16 @@ start:
 	     }
       }
       free(start_job_id);
-      pthread_mutex_unlock(&g_work_lock);
+
+      pthread_rwlock_unlock( &g_work_lock );
+
       json_decref(val);
     }
     else   // !val
     {
-       pthread_mutex_lock(&g_work_lock);
-   	 g_work_time -= LP_SCANTIME;
-	    pthread_mutex_unlock(&g_work_lock);
+       pthread_rwlock_wrlock( &g_work_lock );
+       g_work_time -= LP_SCANTIME;
+       pthread_rwlock_unlock( &g_work_lock );
 	    if (err == CURLE_OPERATION_TIMEDOUT)
        {
 	       restart_threads();
@@ -2689,12 +2687,8 @@ void std_build_block_header( struct work* g_work, uint32_t version,
 void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
 {
    uchar merkle_tree[64] = { 0 };
-   size_t t;
 
    algo_gate.gen_merkle_root( merkle_tree, sctx );
-   // Increment extranonce2
-   for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
-   // Assemble block header
    algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
           (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
           le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits),
@@ -2733,10 +2727,10 @@ static void *stratum_thread(void *userdata )
 
       while ( !stratum.curl )
       {
-         pthread_mutex_lock( &g_work_lock );
+         pthread_rwlock_wrlock( &g_work_lock );
          g_work_time = 0;
-         pthread_mutex_unlock( &g_work_lock );
-         restart_threads();
+         pthread_rwlock_unlock( &g_work_lock );
+//         restart_threads();
          if ( !stratum_connect( &stratum, stratum.url )
               || !stratum_subscribe( &stratum )
               || !stratum_authorize( &stratum, rpc_user, rpc_pass ) )
@@ -2872,167 +2866,180 @@ void parse_arg(int key, char *arg )
 	uint64_t ul;
 	double d;
 
-	switch(key)
-        {
-	   case 'a':
-              get_algo_alias( &arg );
-              for (i = 1; i < ALGO_COUNT; i++)
-              {
-	          v = (int) strlen(algo_names[i]);
-		  if (v && !strncasecmp(arg, algo_names[i], v))
-                  {
-			if (arg[v] == '\0')
-                        {
-				opt_algo = (enum algos) i;
-				break;
-			}
-			if (arg[v] == ':')
-                        {
-				char *ep;
-				v = strtol(arg+v+1, &ep, 10);
-            if (*ep || v < 2)
-					continue;
-				opt_algo = (enum algos) i;
-				opt_param_n = v;
-				break;
-			}
-		  }
+	switch( key )
+   {
+	   case 'a':  // algo
+         get_algo_alias( &arg );
+         for (i = 1; i < ALGO_COUNT; i++)
+         {
+	          v = (int) strlen( algo_names[i] );
+             if ( v && !strncasecmp( arg, algo_names[i], v ) )
+             {
+	             if ( arg[v] == '\0' )
+                {
+		             opt_algo = (enum algos) i;
+			          break;
+		          }
+			       if ( arg[v] == ':' )
+                {
+		             char *ep;
+				       v = strtol( arg+v+1, &ep, 10 );
+                   if ( *ep || v < 2 )
+					       continue;
+				       opt_algo = (enum algos) i;
+				       opt_param_n = v;
+				       break;
+			       }
+		      }
 	      }
-              if (i == ALGO_COUNT)
-              {
-                 applog(LOG_ERR,"Unknown algo: %s",arg);
-                 show_usage_and_exit(1);
-              }
-           break;
+         if ( i == ALGO_COUNT )
+         {
+            applog( LOG_ERR,"Unknown algo: %s",arg );
+            show_usage_and_exit( 1 );
+         }
+      break;
 
-	case 'b':
+	case 'b':  // api-bind
       opt_api_enabled = true;
       p = strstr(arg, ":");
-		if (p) {
+		if ( p )
+      {
 			/* ip:port */
-			if (p - arg > 0) {
+			if ( p - arg > 0 )
+         {
 				opt_api_allow = strdup(arg);
 				opt_api_allow[p - arg] = '\0';
 			}
 			opt_api_listen = atoi(p + 1);
 		}
-		else if (arg && strstr(arg, ".")) {
+		else if ( arg && strstr( arg, "." ) )
+      {
 			/* ip only */
 			free(opt_api_allow);
 			opt_api_allow = strdup(arg);
          opt_api_listen = default_api_listen;
       }
-		else if (arg) {
+		else if ( arg )
+      {
 			/* port or 0 to disable */
          opt_api_allow = default_api_allow;      
          opt_api_listen = atoi(arg);
 		}
       break;
-	case 1030: /* --api-remote */
+	case 1030: // api-remote
 		opt_api_remote = 1;
 		break;
-	case 'B':
+	case 'B':  // background
 		opt_background = true;
 		use_colors = false;
 		break;
-	case 'c': {
+	case 'c': {  // config
 		json_error_t err;
 		json_t *config;
                 
 		if (arg && strstr(arg, "://"))
 			config = json_load_url(arg, &err);
-                else
+      else
 			config = JSON_LOADF(arg, &err);
 		if (!json_is_object(config))
-                {
+      {
 			if (err.line < 0)
 				fprintf(stderr, "%s\n", err.text);
 			else
-				fprintf(stderr, "%s:%d: %s\n",
-					arg, err.line, err.text);
+				fprintf(stderr, "%s:%d: %s\n", arg, err.line, err.text);
 		}
-                else
-                {
+      else
+      {
 			parse_config(config, arg);
 			json_decref(config);
 		}
 		break;
 	}
-	case 'q':
-		opt_quiet = true;
+
+   // debug overrides quiet          
+	case 'q':  // quiet
+		if ( !( opt_debug || opt_protocol ) ) opt_quiet = true;
 		break;
-	case 'D':
+	case 'D':  // debug
 		opt_debug = true;
-		break;
-	case 'p':
+      opt_quiet =	false;
+      break;
+	case 'p':  // pass
 		free(rpc_pass);
 		rpc_pass = strdup(arg);
 		strhide(arg);
 		break;
-	case 'P':
+	case 'P':  // protocol
 		opt_protocol = true;
+      opt_quiet = false;
 		break;
-	case 'r':
+	case 'r':  // retries
 		v = atoi(arg);
 		if (v < -1 || v > 9999) /* sanity check */
 			show_usage_and_exit(1);
 		opt_retries = v;
 		break;
-   case 1025:
+   case 1025:  // retry-pause
       v = atoi(arg);
 		if (v < 1 || v > 9999) /* sanity check */
 			show_usage_and_exit(1);
 		opt_fail_pause = v;
 		break;
-	case 's':
+	case 's':  // scantime
 		v = atoi(arg);
 		if (v < 1 || v > 9999) /* sanity check */
 			show_usage_and_exit(1);
 		opt_scantime = v;
 		break;
-	case 'T':
+	case 'T':  // timeout
 		v = atoi(arg);
 		if (v < 1 || v > 99999) /* sanity check */
 			show_usage_and_exit(1);
 		opt_timeout = v;
 		break;
-	case 't':
+	case 't':  // threads
 		v = atoi(arg);
 		if (v < 0 || v > 9999) /* sanity check */
 			show_usage_and_exit(1);
 		opt_n_threads = v;
 		break;
-	case 'u':
+	case 'u':  // user
 		free(rpc_user);
 		rpc_user = strdup(arg);
 		break;
-	case 'o': {			/* --url */
+	case 'o':  // url
+   {
 		char *ap, *hp;
-		ap = strstr(arg, "://");
+		ap = strstr( arg, "://" );
 		ap = ap ? ap + 3 : arg;
-		hp = strrchr(arg, '@');
-		if (hp) {
+		hp = strrchr( arg, '@' );
+		if ( hp )
+      {
 			*hp = '\0';
-			p = strchr(ap, ':');
-			if (p) {
-				free(rpc_userpass);
-				rpc_userpass = strdup(ap);
-				free(rpc_user);
-				rpc_user = (char*) calloc(p - ap + 1, 1);
-				strncpy(rpc_user, ap, p - ap);
-				free(rpc_pass);
-				rpc_pass = strdup(++p);
-				if (*p) *p++ = 'x';
-				v = (int) strlen(hp + 1) + 1;
-				memmove(p + 1, hp + 1, v);
-				memset(p + v, 0, hp - p);
+			p = strchr( ap, ':' );
+			if ( p )
+         {
+				free( rpc_userpass );
+				rpc_userpass = strdup( ap );
+				free( rpc_user );
+				rpc_user = (char*)calloc( p - ap + 1, 1 );
+				strncpy( rpc_user, ap, p - ap );
+				free( rpc_pass );
+				rpc_pass = strdup( ++p );
+				if ( *p ) *p++ = 'x';
+				v = (int)strlen( hp + 1 ) + 1;
+				memmove( p + 1, hp + 1, v );
+				memset( p + v, 0, hp - p );
 				hp = p;
-			} else {
-				free(rpc_user);
-				rpc_user = strdup(ap);
+			}
+         else
+         {
+				free( rpc_user );
+				rpc_user = strdup( ap );
 			}
 			*hp++ = '@';
-		} else
+		}
+      else
 			hp = ap;
 		if ( ap != arg )
       {
@@ -3048,23 +3055,26 @@ void parse_arg(int key, char *arg )
 			rpc_url = strdup(arg);
 			strcpy(rpc_url + (ap - arg), hp);
 			short_url = &rpc_url[ap - arg];
-		} else {
-			if (*hp == '\0' || *hp == '/') {
-				fprintf(stderr, "invalid URL -- '%s'\n",
-					arg);
-				show_usage_and_exit(1);
+		}
+      else
+      {
+			if ( *hp == '\0' || *hp == '/' )
+         {
+				fprintf( stderr, "invalid URL -- '%s'\n",	arg );
+				show_usage_and_exit( 1 );
 			}
-			free(rpc_url);
+			free( rpc_url );
 			rpc_url = (char*) malloc( strlen(hp) + 15 );
 			sprintf( rpc_url, "stratum+tcp://%s", hp );
 			short_url = &rpc_url[ sizeof("stratum+tcp://") - 1 ];
 		}
-		have_stratum = !opt_benchmark && !strncasecmp(rpc_url, "stratum", 7);
+		have_stratum = !opt_benchmark && !strncasecmp( rpc_url, "stratum", 7 );
 		break;
 	}
-	case 'O':			/* --userpass */
+	case 'O':  // userpass
 		p = strchr(arg, ':');
-		if (!p) {
+		if (!p)
+      {
 			fprintf(stderr, "invalid username:password pair -- '%s'\n", arg);
 			show_usage_and_exit(1);
 		}
@@ -3077,15 +3087,15 @@ void parse_arg(int key, char *arg )
 		rpc_pass = strdup(++p);
 		strhide(p);
 		break;
-	case 'x':			/* --proxy */
-		if (!strncasecmp(arg, "socks4://", 9))
+	case 'x':  // proxy
+		if ( !strncasecmp( arg, "socks4://", 9 ) )
 			opt_proxy_type = CURLPROXY_SOCKS4;
-		else if (!strncasecmp(arg, "socks5://", 9))
+		else if ( !strncasecmp( arg, "socks5://", 9 ) )
 			opt_proxy_type = CURLPROXY_SOCKS5;
 #if LIBCURL_VERSION_NUM >= 0x071200
-		else if (!strncasecmp(arg, "socks4a://", 10))
+		else if ( !strncasecmp( arg, "socks4a://", 10 ) )
 			opt_proxy_type = CURLPROXY_SOCKS4A;
-		else if (!strncasecmp(arg, "socks5h://", 10))
+		else if ( !strncasecmp( arg, "socks5h://", 10 ) )
 			opt_proxy_type = CURLPROXY_SOCKS5_HOSTNAME;
 #endif
 		else
@@ -3093,42 +3103,42 @@ void parse_arg(int key, char *arg )
 		free(opt_proxy);
 		opt_proxy = strdup(arg);
 		break;
-	case 1001:
+	case 1001:  // cert
 		free(opt_cert);
 		opt_cert = strdup(arg);
 		break;
-	case 1002:
+	case 1002:  // no-color
 		use_colors = false;
 		break;
-	case 1003:
+	case 1003:  // no-longpoll
 		want_longpoll = false;
 		break;
-	case 1005:
+	case 1005:  // benchmark
 		opt_benchmark = true;
 		want_longpoll = false;
 		want_stratum = false;
 		have_stratum = false;
 		break;
-	case 1006:
+	case 1006:  // cputest
 //		print_hash_tests();
 		exit(0);
-	case 1007:
+	case 1007:  // no-stratum
 		want_stratum = false;
 		opt_extranonce = false;
 		break;
-	case 1008:
+	case 1008:  // time-limit
 		opt_time_limit = atoi(arg);
 		break;
-	case 1009:
+	case 1009:  // no-redirect
 		opt_redirect = false;
 		break;
-	case 1010:
+	case 1010:  // no-getwork
 		allow_getwork = false;
 		break;
-	case 1011:
+	case 1011:  // no-gbt
 		have_gbt = false;
 		break;
-	case 1012:
+	case 1012:  // no-extranonce
 		opt_extranonce = false;
 		break;
    case 1014:   // hash-meter
@@ -3138,11 +3148,12 @@ void parse_arg(int key, char *arg )
       if ( arg ) coinbase_address = strdup( arg );
 		break;
 	case 1015:			/* --coinbase-sig */
-		if (strlen(arg) + 1 > sizeof(coinbase_sig)) {
-			fprintf(stderr, "coinbase signature too long\n");
-			show_usage_and_exit(1);
+		if ( strlen( arg ) + 1 > sizeof(coinbase_sig) )
+      {
+			fprintf( stderr, "coinbase signature too long\n" );
+			show_usage_and_exit( 1 );
 		}
-		strcpy(coinbase_sig, arg);
+		strcpy( coinbase_sig, arg );
 		break;
 	case 'f':
 		d = atof(arg);
@@ -3156,11 +3167,13 @@ void parse_arg(int key, char *arg )
 			show_usage_and_exit(1);
 		opt_diff_factor = 1.0/d;
 		break;
-	case 'S':
+#ifdef HAVE_SYSLOG_H
+	case 'S':  // syslog
 		use_syslog = true;
 		use_colors = false;
 		break;
-	case 1020:
+#endif
+	case 1020:  // cpu-affinity
 		p = strstr(arg, "0x");
 		if ( p )
 			ul = strtoull( p, NULL, 16 );
@@ -3171,14 +3184,14 @@ void parse_arg(int key, char *arg )
 #if AFFINITY_USES_UINT128
 // replicate the low 64 bits to make a full 128 bit mask if there are more
 // than 64 CPUs, otherwise zero extend the upper half.
-                opt_affinity = (uint128_t)ul;
-                if ( num_cpus > 64 )
-                   opt_affinity = (opt_affinity << 64 ) | opt_affinity;
+         opt_affinity = (uint128_t)ul;
+         if ( num_cpus > 64 )
+            opt_affinity = (opt_affinity << 64 ) | opt_affinity;
 #else
-                   opt_affinity = ul;
+         opt_affinity = ul;
 #endif
 		break;
-	case 1021:
+	case 1021:  // cpu-priority
 		v = atoi(arg);
 		if (v < 0 || v > 5)	/* sanity check */
 			show_usage_and_exit(1);
@@ -3637,7 +3650,7 @@ int main(int argc, char *argv[])
    if ( !check_cpu_capability() ) exit(1);
 
 	pthread_mutex_init( &stats_lock, NULL );
-	pthread_mutex_init( &g_work_lock, NULL );
+   pthread_rwlock_init( &g_work_lock, NULL );
 	pthread_mutex_init( &stratum.sock_lock, NULL );
 	pthread_mutex_init( &stratum.work_lock, NULL );
 
@@ -3797,7 +3810,7 @@ int main(int argc, char *argv[])
 			return 1;
 		}
 	}
-	if (want_stratum)
+	if ( have_stratum )
    {
       if ( opt_debug )
          applog(LOG_INFO,"Creating stratum thread");
diff --git a/miner.h b/miner.h
index 52a3430..626ab09 100644
--- a/miner.h
+++ b/miner.h
@@ -83,6 +83,8 @@ enum {
 };
 #endif
 
+extern bool is_power_of_2( int n );
+
 static inline bool is_windows(void)
 {
 #ifdef WIN32
@@ -378,36 +380,25 @@ void   cpu_brand_string( char* s );
 float cpu_temp( int core );
 */
 
-struct work {
+struct work
+{
+   uint32_t target[8] __attribute__ ((aligned (64)));
 	uint32_t data[48] __attribute__ ((aligned (64)));
-	uint32_t target[8] __attribute__ ((aligned (64)));
-
 	double targetdiff;
-//	double shareratio;
 	double sharediff;
    double stratum_diff;
-
 	int height;
 	char *txs;
 	char *workid;
-
 	char *job_id;
 	size_t xnonce2_len;
 	unsigned char *xnonce2;
    bool sapling;
    bool stale;
-
-   // x16rt
-   uint32_t merkleroothash[8];
-   uint32_t witmerkleroothash[8];
-   uint32_t denom10[8];
-   uint32_t denom100[8];
-   uint32_t denom1000[8];
-   uint32_t denom10000[8];
-
 } __attribute__ ((aligned (64)));
 
-struct stratum_job {
+struct stratum_job
+{
 	unsigned char prevhash[32];
    unsigned char final_sapling_hash[32];
    char *job_id;
@@ -421,7 +412,7 @@ struct stratum_job {
 	unsigned char ntime[4];
 	double diff;
    bool clean;
-   // for x16rt
+   // for x16rt-veil
    unsigned char extra[64];
    unsigned char denom10[32];
    unsigned char denom100[32];
@@ -756,7 +747,7 @@ extern double opt_diff_factor;
 extern double opt_target_factor;
 extern bool opt_randomize;
 extern bool allow_mininginfo;
-extern pthread_mutex_t g_work_lock;
+extern pthread_rwlock_t g_work_lock;
 extern time_t g_work_time;
 extern bool opt_stratum_stats;
 extern int num_cpus;
diff --git a/simd-utils/simd-512.h b/simd-utils/simd-512.h
index cc1e38c..571c36b 100644
--- a/simd-utils/simd-512.h
+++ b/simd-utils/simd-512.h
@@ -375,10 +375,10 @@ static inline void memcpy_512( __m512i *dst, const __m512i *src, const int n )
 
 // Generic for odd rotations
 #define mm512_ror_x64( v, n )      _mm512_alignr_epi64( v, v, n )
-#define mm512_rol_x64( v, n )      _mm512_alignr_epi64( v, v, 8-n )
+#define mm512_rol_x64( v, n )      _mm512_alignr_epi64( v, v, 8-(n) )
 
 #define mm512_ror_x32( v, n )      _mm512_alignr_epi32( v, v, n )
-#define mm512_rol_x32( v, n )      _mm512_alignr_epi32( v, v, 16-n )
+#define mm512_rol_x32( v, n )      _mm512_alignr_epi32( v, v, 16-(n) )
 
 #define mm512_ror_1x16( v ) \
    _mm512_permutexvar_epi16( m512_const_64( \
diff --git a/util.c b/util.c
index 9a7d7cb..14c7286 100644
--- a/util.c
+++ b/util.c
@@ -81,6 +81,15 @@ struct thread_q {
 	pthread_cond_t		cond;
 };
 
+bool is_power_of_2( int n ) 
+{ 
+  while ( n > 1 ) 
+  { 
+      if ( n % 2 != 0 ) return false; 
+      n = n / 2; 
+  } 
+  return true; 
+} 
 
 void applog2( int prio, const char *fmt, ... )
 {
@@ -609,6 +618,8 @@ json_t *json_rpc_call(CURL *curl, const char *url,
 		goto err_out;
 	}
 
+// want_stratum is useless, and so is this code it seems. Nothing in
+// hi appears to be set.   
 	/* If X-Stratum was found, activate Stratum */
 	if (want_stratum && hi.stratum_url &&
 	    !strncasecmp(hi.stratum_url, "stratum+tcp://", 14)) {