v3.8.4.1

v3.8.4
v3.8.3.3
2025-09-17 23:44:27 +00:00 · 2018-03-22 14:28:03 -04:00 · 2018-03-18 12:51:03 -04:00 · 2018-02-25 14:15:07 -05:00 · 2018-02-24 14:36:19 -05:00
22 changed files with 1706 additions and 1321 deletions
--- a/Makefile.am
+++ b/Makefile.am
@@ -68,6 +68,7 @@ cpuminer_SOURCES = \
  algo/cryptonight/cryptonight.c\
  algo/cubehash/sph_cubehash.c \
  algo/cubehash/sse2/cubehash_sse2.c\
+  algo/cubehash/cube-hash-2way.c \
  algo/echo/sph_echo.c \
  algo/echo/aes_ni/hash.c\
  algo/gost/sph_gost.c \
@@ -242,7 +243,7 @@ cpuminer_SOURCES = \
  algo/x17/hmq1725.c \
  algo/yescrypt/yescrypt.c \
  algo/yescrypt/sha256_Y.c \
-  algo/yescrypt/yescrypt-simd.c
+  algo/yescrypt/yescrypt-best.c

 disable_flags =

--- a/README.md
+++ b/README.md
@@ -28,11 +28,12 @@ performance.
 ARM CPUs are not supported.

 2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
-Centos are known to work and have all dependencies in their repositories.
-Others may work but may require more effort.
+Centos, are known to work and have all dependencies in their repositories.
+Others may work but may require more effort. Older versions such as Centos 6
+don't work due to missing features. 
 64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.

-MacOS, OSx is not supported.
+MacOS, OSx and Android are not supported.

 3. Stratum pool. Some algos may work wallet mining using getwork or GBT. YMMV.

@@ -110,6 +111,7 @@ Supported Algorithms
                          yescrypt     Globalboost-Y (BSTY)
                          yescryptr8   BitZeny (ZNY)
                          yescryptr16  Yenten (YTN)
+                          yescryptr32  WAVI
                          zr5          Ziftr

 Errata
--- a/33
+++ b/33
@@ -1,4 +1,4 @@
-cpuminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
+puminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
 This feature requires recent SW including GCC version 5 or higher and
 openssl version 1.1 or higher. It may also require using "-march=znver1"
 compile flag.
@@ -90,7 +90,8 @@ Additional optional compile flags, add the following to CFLAGS to activate:

 SPH may give slightly better performance on algos that use sha256 when using
 openssl 1.0.1 or older. Openssl 1.0.2 adds AVX2 and 1.1 adds SHA and perform
-better than SPH.
+better than SPH. This option is ignored when 4-way is used, even for CPUs
+with SHA.

 Start mining.

@@ -159,6 +160,34 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
 Change Log
 ----------

+v3.8.4.1
+
+Fixed sha256t low difficulty rejects.
+Fixed compile error on CPUs with AVX512.
+
+v3.8.4
+
+Added yescryptr32 algo for WAVI coin.
+Added URL to API data.
+Improved detection of __int128 support (linux only)
+Compile support for CPUs without SSSE3 (no binary support)
+
+v3.8.3.3
+
+Integrated getblocktemplate with algo_gate.
+Added support for hodl gbt (untested).
+Reworked some recent quick fixes.
+
+v3.8.3.2
+
+Reverted gbt changes from v3.8.0 that broke getwork.
+Reverted scaled hash rate for API, added HS term in addition to KHS. 
+Added blocks solved to console display and API.
+
+v3.8.3.1
+
+Fixed regression in v3.8.3 that broke several algos.
+
 v3.8.3

 More restoration of lost lyra2 hash.
--- a/algo-gate-api.c
+++ b/algo-gate-api.c
@@ -119,9 +119,11 @@ void init_algo_gate( algo_gate_t* gate )
   gate->gen_merkle_root         = (void*)&sha256d_gen_merkle_root;
   gate->stratum_gen_work        = (void*)&std_stratum_gen_work;
   gate->build_stratum_request   = (void*)&std_le_build_stratum_request;
+   gate->malloc_txs_request      = (void*)&std_malloc_txs_request;
   gate->set_target              = (void*)&std_set_target;
   gate->work_decode             = (void*)&std_le_work_decode;
   gate->submit_getwork_result   = (void*)&std_le_submit_getwork_result;
+   gate->build_block_header      = (void*)&std_build_block_header;
   gate->build_extraheader       = (void*)&std_build_extraheader;
   gate->set_work_data_endian    = (void*)&do_nothing;
   gate->calc_network_diff       = (void*)&std_calc_network_diff;
@@ -225,6 +227,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
     case ALGO_YESCRYPT:     register_yescrypt_algo    ( gate ); break;
     case ALGO_YESCRYPTR8:   register_yescryptr8_algo  ( gate ); break;
     case ALGO_YESCRYPTR16:  register_yescryptr16_algo ( gate ); break;
+     case ALGO_YESCRYPTR32:  register_yescryptr32_algo ( gate ); break;
     case ALGO_ZR5:          register_zr5_algo         ( gate ); break;
    default:
        applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );
--- a/algo-gate-api.h
+++ b/algo-gate-api.h
@@ -127,7 +127,10 @@ void ( *set_target)              ( struct work*, double );
 bool ( *submit_getwork_result )  ( CURL*, struct work* );
 void ( *gen_merkle_root )        ( char*, struct stratum_ctx* );
 void ( *build_extraheader )      ( struct work*, struct stratum_ctx* );
+void ( *build_block_header )     ( struct work*, uint32_t, uint32_t*,
+                                   uint32_t*, uint32_t, uint32_t );
 void ( *build_stratum_request )  ( char*, struct work*, struct stratum_ctx* );
+char* ( *malloc_txs_request )    ( struct work* );
 void ( *set_work_data_endian )   ( struct work* );
 double ( *calc_network_diff )    ( struct work* );
 bool ( *ready_to_mine )          ( struct work*, struct stratum_ctx*, int );
@@ -228,11 +231,17 @@ void std_le_build_stratum_request( char *req, struct work *work );
 void std_be_build_stratum_request( char *req, struct work *work );
 void jr2_build_stratum_request   ( char *req, struct work *work );

+char* std_malloc_txs_request( struct work *work );
+
 // Default is do_nothing (assumed LE)
 void set_work_data_big_endian( struct work *work );

 double std_calc_network_diff( struct work *work );

+void std_build_block_header( struct work* g_work, uint32_t version,
+                             uint32_t *prevhash, uint32_t *merkle_root,
+                             uint32_t ntime, uint32_t nbits );
+
 void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );

 json_t* std_longpoll_rpc_call( CURL *curl, int *err, char *lp_url );
--- a/algo/cubehash/cube-hash-2way.c
+++ b/algo/cubehash/cube-hash-2way.c
@@ -0,0 +1,205 @@
+#if defined(__AVX2__)
+
+#include <stdbool.h>
+#include <unistd.h>
+#include <memory.h>
+#include "cube-hash-2way.h"
+
+// 2x128
+
+static void transform_2way( cube_2way_context *sp )
+{
+    int r;
+    const int rounds = sp->rounds;
+
+    __m256i x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3;
+
+    x0 = _mm256_load_si256( (__m256i*)sp->h     );
+    x1 = _mm256_load_si256( (__m256i*)sp->h + 1 );
+    x2 = _mm256_load_si256( (__m256i*)sp->h + 2 );
+    x3 = _mm256_load_si256( (__m256i*)sp->h + 3 );
+    x4 = _mm256_load_si256( (__m256i*)sp->h + 4 );
+    x5 = _mm256_load_si256( (__m256i*)sp->h + 5 );
+    x6 = _mm256_load_si256( (__m256i*)sp->h + 6 );
+    x7 = _mm256_load_si256( (__m256i*)sp->h + 7 );
+
+    for ( r = 0; r < rounds; ++r )
+    {
+        x4 = _mm256_add_epi32( x0, x4 );
+        x5 = _mm256_add_epi32( x1, x5 );
+        x6 = _mm256_add_epi32( x2, x6 );
+        x7 = _mm256_add_epi32( x3, x7 );
+        y0 = x2;
+        y1 = x3;
+        y2 = x0;
+        y3 = x1;
+        x0 = _mm256_xor_si256( _mm256_slli_epi32( y0,  7 ),
+                               _mm256_srli_epi32( y0, 25 ) );
+        x1 = _mm256_xor_si256( _mm256_slli_epi32( y1,  7 ),
+                               _mm256_srli_epi32( y1, 25 ) );
+        x2 = _mm256_xor_si256( _mm256_slli_epi32( y2,  7 ),
+                               _mm256_srli_epi32( y2, 25 ) );
+        x3 = _mm256_xor_si256( _mm256_slli_epi32( y3,  7 ),
+                               _mm256_srli_epi32( y3, 25 ) );
+        x0 = _mm256_xor_si256( x0, x4 );
+        x1 = _mm256_xor_si256( x1, x5 );
+        x2 = _mm256_xor_si256( x2, x6 );
+        x3 = _mm256_xor_si256( x3, x7 );
+        x4 = mm256_swap128_64( x4 );
+        x5 = mm256_swap128_64( x5 );
+        x6 = mm256_swap128_64( x6 );
+        x7 = mm256_swap128_64( x7 );
+        x4 = _mm256_add_epi32( x0, x4 );
+        x5 = _mm256_add_epi32( x1, x5 );
+        x6 = _mm256_add_epi32( x2, x6 );
+        x7 = _mm256_add_epi32( x3, x7 );
+        y0 = x1;
+        y1 = x0;
+        y2 = x3;
+        y3 = x2;
+        x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 11 ),
+                               _mm256_srli_epi32( y0, 21 ) );
+        x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 11 ),
+                               _mm256_srli_epi32( y1, 21 ) );
+        x2 = _mm256_xor_si256( _mm256_slli_epi32( y2, 11 ),
+                               _mm256_srli_epi32( y2, 21 ) );
+        x3 = _mm256_xor_si256( _mm256_slli_epi32( y3, 11 ),
+                               _mm256_srli_epi32( y3, 21 ) );
+        x0 = _mm256_xor_si256( x0, x4 );
+        x1 = _mm256_xor_si256( x1, x5 );
+        x2 = _mm256_xor_si256( x2, x6 );
+        x3 = _mm256_xor_si256( x3, x7 );
+        x4 = mm256_swap64_32( x4 );
+        x5 = mm256_swap64_32( x5 );
+        x6 = mm256_swap64_32( x6 );
+        x7 = mm256_swap64_32( x7 );
+    }
+
+    _mm256_store_si256( (__m256i*)sp->h,     x0 );
+    _mm256_store_si256( (__m256i*)sp->h + 1, x1 );
+    _mm256_store_si256( (__m256i*)sp->h + 2, x2 );
+    _mm256_store_si256( (__m256i*)sp->h + 3, x3 );
+    _mm256_store_si256( (__m256i*)sp->h + 4, x4 );
+    _mm256_store_si256( (__m256i*)sp->h + 5, x5 );
+    _mm256_store_si256( (__m256i*)sp->h + 6, x6 );
+    _mm256_store_si256( (__m256i*)sp->h + 7, x7 );
+
+}
+
+cube_2way_context cube_2way_ctx_cache __attribute__ ((aligned (64)));
+
+int cube_2way_reinit( cube_2way_context *sp )
+{
+   memcpy( sp, &cube_2way_ctx_cache, sizeof(cube_2way_context) );
+   return 0;
+
+}
+
+int cube_2way_init( cube_2way_context *sp, int hashbitlen, int rounds,
+                       int blockbytes )
+{
+    int i;
+
+    // all sizes of __m128i
+    cube_2way_ctx_cache.hashlen   = hashbitlen/128;
+    cube_2way_ctx_cache.blocksize = blockbytes/16;
+    cube_2way_ctx_cache.rounds    = rounds;
+    cube_2way_ctx_cache.pos       = 0;
+
+    for ( i = 0; i < 8; ++i )
+       cube_2way_ctx_cache.h[i] = m256_zero;
+
+    cube_2way_ctx_cache.h[0] = _mm256_set_epi32(
+                                   0, rounds, blockbytes, hashbitlen / 8,
+                                   0, rounds, blockbytes, hashbitlen / 8 );
+
+    for ( i = 0; i < 10; ++i )
+       transform_2way( &cube_2way_ctx_cache );
+
+    memcpy( sp, &cube_2way_ctx_cache, sizeof(cube_2way_context) );
+    return 0;
+}
+
+
+int cube_2way_update( cube_2way_context *sp, const void *data, size_t size )
+{
+    const int len = size / 16;
+    const __m256i *in = (__m256i*)data;
+    int i;
+
+    // It is assumed data is aligned to 256 bits and is a multiple of 128 bits.
+    // Current usage sata is either 64 or 80 bytes.
+
+    for ( i = 0; i < len; i++ )
+    {
+        sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ], in[i] );
+        sp->pos++;
+        if ( sp->pos == sp->blocksize )
+        {
+           transform_2way( sp );
+           sp->pos = 0;
+        }
+    }
+
+    return 0;
+}
+
+int cube_2way_close( cube_2way_context *sp, void *output )
+{
+    __m256i *hash = (__m256i*)output;
+    int i;
+
+    // pos is zero for 64 byte data, 1 for 80 byte data.
+    sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
+                    _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80,
+                                     0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) );
+    transform_2way( sp );
+
+    sp->h[7] = _mm256_xor_si256( sp->h[7], _mm256_set_epi32( 1,0,0,0,
+                                                             1,0,0,0 ) );
+    for ( i = 0; i < 10; ++i )
+       transform_2way( &cube_2way_ctx_cache );
+
+    for ( i = 0; i < sp->hashlen; i++ )
+       hash[i] = sp->h[i];
+
+    return 0;
+}
+
+int cube_2way_update_close( cube_2way_context *sp, void *output,
+                               const void *data, size_t size )
+{
+    const int len = size / 16;
+    const __m256i *in = (__m256i*)data;
+    __m256i *hash = (__m256i*)output;
+    int i;
+
+    for ( i = 0; i < len; i++ )
+    {
+        sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ], in[i] );
+        sp->pos++;
+        if ( sp->pos == sp->blocksize )
+        {
+           transform_2way( sp );
+           sp->pos = 0;
+        }
+    }
+
+    // pos is zero for 64 byte data, 1 for 80 byte data.
+    sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
+                    _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80,
+                                     0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) );
+    transform_2way( sp );
+
+    sp->h[7] = _mm256_xor_si256( sp->h[7], _mm256_set_epi32( 1,0,0,0,
+                                                             1,0,0,0 ) );
+    for ( i = 0; i < 10; ++i )
+       transform_2way( &cube_2way_ctx_cache );
+
+    for ( i = 0; i < sp->hashlen; i++ )
+       hash[i] = sp->h[i];
+
+    return 0;
+}
+
+#endif
--- a/algo/cubehash/cube-hash-2way.h
+++ b/algo/cubehash/cube-hash-2way.h
@@ -0,0 +1,36 @@
+#ifndef CUBE_HASH_2WAY_H__
+#define CUBE_HASH_2WAY_H__
+
+#if defined(__AVX2__)
+
+#include <stdint.h>
+#include "avxdefs.h"
+
+// 2x128, 2 way parallel SSE2
+
+struct _cube_2way_context
+{
+    int hashlen;           // __m128i
+    int rounds;
+    int blocksize;         // __m128i
+    int pos;               // number of __m128i read into x from current block
+    __m256i h[8] __attribute__ ((aligned (64)));
+};
+
+typedef struct _cube_2way_context cube_2way_context;
+
+int cube_2way_init( cube_2way_context* sp, int hashbitlen, int rounds,
+                       int blockbytes );
+// reinitialize context with same parameters, much faster.
+int cube_2way_reinit( cube_2way_context *sp );
+
+int cube_2way_update( cube_2way_context *sp, const void *data, size_t size );
+
+int cube_2way_close( cube_2way_context *sp, void *output );
+
+int cube_2way_update_close( cube_2way_context *sp, void *output,
+                            const void *data, size_t size );
+
+
+#endif
+#endif
--- a/algo/hodl/hodl-gate.c
+++ b/algo/hodl/hodl-gate.c
@@ -42,15 +42,82 @@ void hodl_le_build_stratum_request( char* req, struct work* work,
   free( xnonce2str );
 }

-void hodl_build_extraheader( struct work* g_work, struct stratum_ctx *sctx )
+char* hodl_malloc_txs_request( struct work *work )
 {
-   uchar merkle_root[64] = { 0 };
-   size_t t;
+  char* req;
+  json_t *val;
+  char data_str[2 * sizeof(work->data) + 1];
  int i;

-   algo_gate.gen_merkle_root( merkle_root, sctx );
+  for ( i = 0; i < ARRAY_SIZE(work->data); i++ )
+    be32enc( work->data + i, work->data[i] );
+
+  bin2hex( data_str, (unsigned char *)work->data, 88 );
+  if ( work->workid )
+  {
+    char *params;
+    val = json_object();
+    json_object_set_new( val, "workid", json_string( work->workid ) );
+    params = json_dumps( val, 0 );
+    json_decref( val );
+    req = malloc( 128 + 2*88 + strlen( work->txs ) + strlen( params ) );
+    sprintf( req,
+     "{\"method\": \"submitblock\", \"params\": [\"%s%s\", %s], \"id\":1}\r\n",
+      data_str, work->txs, params);
+    free( params );
+  }
+  else
+  {
+    req = malloc( 128 + 2*88 + strlen(work->txs));
+    sprintf( req,
+       "{\"method\": \"submitblock\", \"params\": [\"%s%s\"], \"id\":1}\r\n",
+        data_str, work->txs);
+  }
+  return req;
+}
+
+void hodl_build_block_header( struct work* g_work, uint32_t version,
+                              uint32_t *prevhash, uint32_t *merkle_tree,
+                              uint32_t ntime, uint32_t nbits )
+{
+   int i;
+
+   memset( g_work->data, 0, sizeof(g_work->data) );
+   g_work->data[0] = version;
+
+   if ( have_stratum )
+      for ( i = 0; i < 8; i++ )
+         g_work->data[ 1+i ] = le32dec( prevhash + i );
+   else
+      for (i = 0; i < 8; i++)
+         g_work->data[ 8-i ] = le32dec( prevhash + i );
+
+   for ( i = 0; i < 8; i++ )
+      g_work->data[ 9+i ] = be32dec( merkle_tree + i );
+
+   g_work->data[ algo_gate.ntime_index ] = ntime;
+   g_work->data[ algo_gate.nbits_index ] = nbits;
+   g_work->data[22] = 0x80000000;
+   g_work->data[31] = 0x00000280;
+}
+
+// hodl build_extra_header is redundant, hodl can use std_build_extra_header
+// and call hodl_build_block_header.
+#if 0
+void hodl_build_extraheader( struct work* g_work, struct stratum_ctx *sctx )
+{
+   uchar merkle_tree[64] = { 0 };
+   size_t t;
+//   int i;
+
+   algo_gate.gen_merkle_root( merkle_tree, sctx );
   // Increment extranonce2
   for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
+
+   algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
+          (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
+          le32dec( sctx->job.ntime ), le32dec( sctx->job.nbits ) );
+/*
   // Assemble block header
   memset( g_work->data, 0, sizeof(g_work->data) );
   g_work->data[0] = le32dec( sctx->job.version );
@@ -63,7 +130,9 @@ void hodl_build_extraheader( struct work* g_work, struct stratum_ctx *sctx )
   g_work->data[ algo_gate.nbits_index ] = le32dec( sctx->job.nbits );
   g_work->data[22] = 0x80000000;
   g_work->data[31] = 0x00000280;
+*/
 }
+#endif

 // called only by thread 0, saves a backup of g_work
 void hodl_get_new_work( struct work* work, struct work* g_work)
@@ -73,6 +142,22 @@ void hodl_get_new_work( struct work* work, struct work* g_work)
     hodl_work.data[ algo_gate.nonce_index ] = ( clock() + rand() ) % 9999;
 }

+json_t *hodl_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
+{
+   json_t *val;
+   char *req = NULL;
+
+   if ( have_gbt )
+   {
+      req = malloc( strlen( gbt_lp_req ) + strlen( lp_id ) + 1 );
+      sprintf( req, gbt_lp_req, lp_id );
+   }
+   val = json_rpc_call( curl, lp_url, rpc_userpass,
+                        req ? req : getwork_req, err, JSON_RPC_LONGPOLL );
+   free( req );
+   return val;
+}
+
 // called by every thread, copies the backup to each thread's work.
 void hodl_resync_threads( struct work* work )
 {
@@ -108,17 +193,26 @@ bool register_hodl_algo( algo_gate_t* gate )
  applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
  return false;
 #endif
+//  if ( TOTAL_CHUNKS % opt_n_threads )
+//  {
+//     applog(LOG_ERR,"Thread count must be power of 2.");
+//     return false;
+//  }
  pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
-  gate->optimizations         = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
+  gate->optimizations         = AES_OPT | AVX_OPT | AVX2_OPT;
  gate->scanhash              = (void*)&hodl_scanhash;
  gate->get_new_work          = (void*)&hodl_get_new_work;
+  gate->longpoll_rpc_call     = (void*)&hodl_longpoll_rpc_call;
  gate->set_target            = (void*)&hodl_set_target;
  gate->build_stratum_request = (void*)&hodl_le_build_stratum_request;
-  gate->build_extraheader     = (void*)&hodl_build_extraheader;
+  gate->malloc_txs_request    = (void*)&hodl_malloc_txs_request;
+  gate->build_block_header    = (void*)&hodl_build_block_header;
+//  gate->build_extraheader     = (void*)&hodl_build_extraheader;
  gate->resync_threads        = (void*)&hodl_resync_threads;
  gate->do_this_thread        = (void*)&hodl_do_this_thread;
  gate->work_cmp_size         = 76;
  hodl_scratchbuf = (unsigned char*)malloc( 1 << 30 );
+  allow_getwork = false;
  return ( hodl_scratchbuf != NULL );
 }

--- a/algo/hodl/hodl-wolf.c
+++ b/algo/hodl/hodl-wolf.c
@@ -10,8 +10,13 @@

 #ifndef NO_AES_NI               

-void GenerateGarbageCore(CacheEntry *Garbage, int ThreadID, int ThreadCount, void *MidHash)
+void GenerateGarbageCore( CacheEntry *Garbage, int ThreadID, int ThreadCount,
+     void *MidHash )
 {
+    const int Chunk = TOTAL_CHUNKS / ThreadCount;
+    const uint32_t StartChunk = ThreadID * Chunk;
+    const uint32_t EndChunk   = StartChunk + Chunk;
+
 #ifdef __AVX__
    uint64_t* TempBufs[ SHA512_PARALLEL_N ] ;
    uint64_t* desination[ SHA512_PARALLEL_N ];
@@ -22,9 +27,7 @@ void GenerateGarbageCore(CacheEntry *Garbage, int ThreadID, int ThreadCount, voi
        memcpy( TempBufs[i], MidHash, 32 );
    }

-    uint32_t StartChunk = ThreadID * (TOTAL_CHUNKS / ThreadCount);
-    for ( uint32_t i = StartChunk;
-          i < StartChunk + (TOTAL_CHUNKS / ThreadCount); i+= SHA512_PARALLEL_N )
+    for ( uint32_t i = StartChunk; i < EndChunk; i += SHA512_PARALLEL_N )
    {
        for ( int j = 0; j < SHA512_PARALLEL_N; ++j )
        {
@@ -41,9 +44,7 @@ void GenerateGarbageCore(CacheEntry *Garbage, int ThreadID, int ThreadCount, voi
    uint32_t TempBuf[8];
    memcpy( TempBuf, MidHash, 32 );

-    uint32_t StartChunk = ThreadID * (TOTAL_CHUNKS / ThreadCount);
-    for ( uint32_t i = StartChunk;
-          i < StartChunk + (TOTAL_CHUNKS / ThreadCount); ++i )
+    for ( uint32_t i = StartChunk; i < EndChunk; ++i )
    {
        TempBuf[0] = i;
        SHA512( ( uint8_t *)TempBuf, 32,
--- a/algo/lyra2/sponge.h
+++ b/algo/lyra2/sponge.h
@@ -55,23 +55,23 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
 // returns void, updates all args
 #define G_4X64(a,b,c,d) \
   a = _mm256_add_epi64( a, b ); \
-   d = mm256_rotr_64( _mm256_xor_si256( d, a), 32 ); \
+   d = mm256_ror_64( _mm256_xor_si256( d, a), 32 ); \
   c = _mm256_add_epi64( c, d ); \
-   b = mm256_rotr_64( _mm256_xor_si256( b, c ), 24 ); \
+   b = mm256_ror_64( _mm256_xor_si256( b, c ), 24 ); \
   a = _mm256_add_epi64( a, b ); \
-   d = mm256_rotr_64( _mm256_xor_si256( d, a ), 16 ); \
+   d = mm256_ror_64( _mm256_xor_si256( d, a ), 16 ); \
   c = _mm256_add_epi64( c, d ); \
-   b = mm256_rotr_64( _mm256_xor_si256( b, c ), 63 );
+   b = mm256_ror_64( _mm256_xor_si256( b, c ), 63 );

 #define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
   G_4X64( s0, s1, s2, s3 ); \
-   s1 = mm256_rotr256_1x64( s1); \
+   s1 = mm256_ror256_1x64( s1); \
   s2 = mm256_swap_128( s2 ); \
-   s3 = mm256_rotl256_1x64( s3 ); \
+   s3 = mm256_rol256_1x64( s3 ); \
   G_4X64( s0, s1, s2, s3 ); \
-   s1 = mm256_rotl256_1x64( s1 ); \
+   s1 = mm256_rol256_1x64( s1 ); \
   s2 = mm256_swap_128( s2 ); \
-   s3 = mm256_rotr256_1x64( s3 );
+   s3 = mm256_ror256_1x64( s3 );

 #define LYRA_12_ROUNDS_AVX2( s0, s1, s2, s3 ) \
   LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
@@ -94,25 +94,25 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
 // returns void, all args updated
 #define G_2X64(a,b,c,d) \
   a = _mm_add_epi64( a, b ); \
-   d = mm_rotr_64( _mm_xor_si128( d, a), 32 ); \
+   d = mm_ror_64( _mm_xor_si128( d, a), 32 ); \
   c = _mm_add_epi64( c, d ); \
-   b = mm_rotr_64( _mm_xor_si128( b, c ), 24 ); \
+   b = mm_ror_64( _mm_xor_si128( b, c ), 24 ); \
   a = _mm_add_epi64( a, b ); \
-   d = mm_rotr_64( _mm_xor_si128( d, a ), 16 ); \
+   d = mm_ror_64( _mm_xor_si128( d, a ), 16 ); \
   c = _mm_add_epi64( c, d ); \
-   b = mm_rotr_64( _mm_xor_si128( b, c ), 63 );
+   b = mm_ror_64( _mm_xor_si128( b, c ), 63 );

 #define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
   G_2X64( s0, s2, s4, s6 ); \
   G_2X64( s1, s3, s5, s7 ); \
-   mm_rotl256_1x64( s2, s3 ); \
+   mm_ror256_1x64( s2, s3 ); \
   mm_swap_128( s4, s5 ); \
-   mm_rotr256_1x64( s6, s7 ); \
+   mm_rol256_1x64( s6, s7 ); \
   G_2X64( s0, s2, s4, s6 ); \
   G_2X64( s1, s3, s5, s7 ); \
-   mm_rotr256_1x64( s2, s3 ); \
+   mm_rol256_1x64( s2, s3 ); \
   mm_swap_128( s4, s5 ); \
-   mm_rotl256_1x64( s6, s7 );
+   mm_ror256_1x64( s6, s7 );

 #define LYRA_12_ROUNDS_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
   LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
--- a/algo/ripemd/lbry-gate.c
+++ b/algo/ripemd/lbry-gate.c
@@ -40,6 +40,35 @@ void lbry_le_build_stratum_request( char *req, struct work *work,
         rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
   free(xnonce2str);
 }
+
+// don't use lbry_build_block_header, it can't handle clasim, do it inline
+// in lbry_build_extraheader. The side effect is no gbt support for lbry.
+void lbry_build_block_header( struct work* g_work, uint32_t version,
+                             uint32_t *prevhash, uint32_t *merkle_root,
+                             uint32_t ntime, uint32_t nbits )
+{
+   int i;
+   memset( g_work->data, 0, sizeof(g_work->data) );
+   g_work->data[0] =  version;
+
+   if ( have_stratum )
+      for ( i = 0; i < 8; i++ )
+         g_work->data[1 + i] = le32dec( prevhash + i );
+   else
+      for (i = 0; i < 8; i++)
+         g_work->data[ 8-i ] = le32dec( prevhash + i );
+
+   for ( i = 0; i < 8; i++ )
+      g_work->data[9 + i] = be32dec( merkle_root + i );
+
+//   for ( int i = 0; i < 8; i++ )
+//        g_work->data[17 + i] = claim[i];
+
+   g_work->data[ LBRY_NTIME_INDEX ] = ntime;
+   g_work->data[ LBRY_NBITS_INDEX ] = nbits;
+   g_work->data[28] = 0x80000000;
+}
+
 void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
 {
   unsigned char merkle_root[64] = { 0 };
@@ -50,14 +79,23 @@ void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
   // Increment extranonce2 
   for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
   // Assemble block header 
+
+//   algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
+//          (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_root,
+//          le32dec( sctx->job.ntime ), le32dec( sctx->job.nbits ) );
+
   memset( g_work->data, 0, sizeof(g_work->data) );
   g_work->data[0] = le32dec( sctx->job.version );
+
   for ( i = 0; i < 8; i++ )
      g_work->data[1 + i] = le32dec( (uint32_t *) sctx->job.prevhash + i );
+
   for ( i = 0; i < 8; i++ )
      g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );
+
   for ( int i = 0; i < 8; i++ )
        g_work->data[17 + i] = ((uint32_t*)sctx->job.claim)[i];
+
   g_work->data[ LBRY_NTIME_INDEX ] = le32dec(sctx->job.ntime);
   g_work->data[ LBRY_NBITS_INDEX ] = le32dec(sctx->job.nbits);
   g_work->data[28] = 0x80000000;
@@ -86,6 +124,7 @@ bool register_lbry_algo( algo_gate_t* gate )
  gate->calc_network_diff     = (void*)&lbry_calc_network_diff;
  gate->get_max64             = (void*)&lbry_get_max64;
  gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
+//  gate->build_block_header    = (void*)&build_block_header;
  gate->build_extraheader     = (void*)&lbry_build_extraheader;
  gate->set_target            = (void*)&lbry_set_target;
  gate->ntime_index           = LBRY_NTIME_INDEX;
--- a/algo/sha/sha256t.c
+++ b/algo/sha/sha256t.c
@@ -155,7 +155,7 @@ bool register_sha256t_algo( algo_gate_t* gate )
    gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
    gate->scanhash   = (void*)&scanhash_sha256t;
    gate->hash       = (void*)&sha256t_hash;
-    gate->set_target = (void*)&sha256t_set_target;
+//    gate->set_target = (void*)&sha256t_set_target;
    gate->get_max64  = (void*)&get_max64_0x3ffff;
    return true;
 }
--- a/algo/shavite/sph-shavite-aesni.c
+++ b/algo/shavite/sph-shavite-aesni.c
@@ -52,21 +52,6 @@ extern "C"{

 #define C32   SPH_C32

-/*
- * As of round 2 of the SHA-3 competition, the published reference
- * implementation and test vectors are wrong, because they use
- * big-endian AES tables while the internal decoding uses little-endian.
- * The code below follows the specification. To turn it into a code
- * which follows the reference implementation (the one called "BugFix"
- * on the SHAvite-3 web site, published on Nov 23rd, 2009), comment out
- * the code below (from the '#define AES_BIG_ENDIAN...' to the definition
- * of the AES_ROUND_NOKEY macro) and replace it with the version which
- * is commented out afterwards.
- */
-
-#define AES_BIG_ENDIAN   0
-#include "algo/sha/aes_helper.c"
-
 static const sph_u32 IV512[] = {
 	C32(0x72FCCDD8), C32(0x79CA4727), C32(0x128A077B), C32(0x40D55AEC),
 	C32(0xD1901A06), C32(0x430AE307), C32(0xB29F5CD1), C32(0xDF07FBFC),
@@ -74,210 +59,19 @@ static const sph_u32 IV512[] = {
 	C32(0xE275EADE), C32(0x502D9FCD), C32(0xB9357178), C32(0x022A4B9A)
 };

-// Return hi 128 bits with elements shifted one lane with vacated lane filled
-// with data rotated from lo.
 // Partially rotate elements in two 128 bit vectors as one 256 bit vector
 // and return the rotated high 128 bits.
-// Similar to mm_rotr256_1x32 but only a partial rotation as lo is not
-// completed. It's faster than a full rotation.
+#if defined(__SSSE3__)

-static inline __m128i mm_rotr256hi_1x32( __m128i hi, __m128i lo, int n )
-{   return _mm_or_si128( _mm_srli_si128( hi, n<<2 ),
-                        _mm_slli_si128( lo, 16 - (n<<2) ) );
-}
+#define mm_rotr256hi_1x32( hi, lo )  _mm_alignr_epi8( lo, hi, 4 )

-#define AES_ROUND_NOKEY(x0, x1, x2, x3)   do { \
-		sph_u32 t0 = (x0); \
-		sph_u32 t1 = (x1); \
-		sph_u32 t2 = (x2); \
-		sph_u32 t3 = (x3); \
-		AES_ROUND_NOKEY_LE(t0, t1, t2, t3, x0, x1, x2, x3); \
-	} while (0)
+#else  // SSE2

+#define mm_rotr256hi_1x32( hi, lo ) \
+   _mm_or_si128( _mm_srli_si128( hi,  4 ), \
+                 _mm_slli_si128( lo, 12 ) )

-#define KEY_EXPAND_ELT(k0, k1, k2, k3)   do { \
-		sph_u32 kt; \
-		AES_ROUND_NOKEY(k1, k2, k3, k0); \
-		kt = (k0); \
-		(k0) = (k1); \
-		(k1) = (k2); \
-		(k2) = (k3); \
-		(k3) = kt; \
-	} while (0)
-
-
-#if SPH_SMALL_FOOTPRINT_SHAVITE
-
-/*
- * This function assumes that "msg" is aligned for 32-bit access.
- */
-static void
-c512(sph_shavite_big_context *sc, const void *msg)
-{
-	sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;
-	sph_u32 p8, p9, pA, pB, pC, pD, pE, pF;
-	sph_u32 rk[448];
-	size_t u;
-	int r, s;
-
-#if SPH_LITTLE_ENDIAN
-	memcpy(rk, msg, 128);
-#else
-	for (u = 0; u < 32; u += 4) {
-		rk[u + 0] = sph_dec32le_aligned(
-			(const unsigned char *)msg + (u << 2) +  0);
-		rk[u + 1] = sph_dec32le_aligned(
-			(const unsigned char *)msg + (u << 2) +  4);
-		rk[u + 2] = sph_dec32le_aligned(
-			(const unsigned char *)msg + (u << 2) +  8);
-		rk[u + 3] = sph_dec32le_aligned(
-			(const unsigned char *)msg + (u << 2) + 12);
-	}
 #endif
-	u = 32;
-	for (;;) {
-		for (s = 0; s < 4; s ++) {
-			sph_u32 x0, x1, x2, x3;
-
-			x0 = rk[u - 31];
-			x1 = rk[u - 30];
-			x2 = rk[u - 29];
-			x3 = rk[u - 32];
-			AES_ROUND_NOKEY(x0, x1, x2, x3);
-			rk[u + 0] = x0 ^ rk[u - 4];
-			rk[u + 1] = x1 ^ rk[u - 3];
-			rk[u + 2] = x2 ^ rk[u - 2];
-			rk[u + 3] = x3 ^ rk[u - 1];
-			if (u == 32) {
-				rk[ 32] ^= sc->count0;
-				rk[ 33] ^= sc->count1;
-				rk[ 34] ^= sc->count2;
-				rk[ 35] ^= SPH_T32(~sc->count3);
-			} else if (u == 440) {
-				rk[440] ^= sc->count1;
-				rk[441] ^= sc->count0;
-				rk[442] ^= sc->count3;
-				rk[443] ^= SPH_T32(~sc->count2);
-			}
-			u += 4;
-
-			x0 = rk[u - 31];
-			x1 = rk[u - 30];
-			x2 = rk[u - 29];
-			x3 = rk[u - 32];
-			AES_ROUND_NOKEY(x0, x1, x2, x3);
-			rk[u + 0] = x0 ^ rk[u - 4];
-			rk[u + 1] = x1 ^ rk[u - 3];
-			rk[u + 2] = x2 ^ rk[u - 2];
-			rk[u + 3] = x3 ^ rk[u - 1];
-			if (u == 164) {
-				rk[164] ^= sc->count3;
-				rk[165] ^= sc->count2;
-				rk[166] ^= sc->count1;
-				rk[167] ^= SPH_T32(~sc->count0);
-			} else if (u == 316) {
-				rk[316] ^= sc->count2;
-				rk[317] ^= sc->count3;
-				rk[318] ^= sc->count0;
-				rk[319] ^= SPH_T32(~sc->count1);
-			}
-			u += 4;
-		}
-		if (u == 448)
-			break;
-		for (s = 0; s < 8; s ++) {
-			rk[u + 0] = rk[u - 32] ^ rk[u - 7];
-			rk[u + 1] = rk[u - 31] ^ rk[u - 6];
-			rk[u + 2] = rk[u - 30] ^ rk[u - 5];
-			rk[u + 3] = rk[u - 29] ^ rk[u - 4];
-			u += 4;
-		}
-	}
-
-	p0 = sc->h[0x0];
-	p1 = sc->h[0x1];
-	p2 = sc->h[0x2];
-	p3 = sc->h[0x3];
-	p4 = sc->h[0x4];
-	p5 = sc->h[0x5];
-	p6 = sc->h[0x6];
-	p7 = sc->h[0x7];
-	p8 = sc->h[0x8];
-	p9 = sc->h[0x9];
-	pA = sc->h[0xA];
-	pB = sc->h[0xB];
-	pC = sc->h[0xC];
-	pD = sc->h[0xD];
-	pE = sc->h[0xE];
-	pF = sc->h[0xF];
-	u = 0;
-	for (r = 0; r < 14; r ++) {
-#define C512_ELT(l0, l1, l2, l3, r0, r1, r2, r3)   do { \
-		sph_u32 x0, x1, x2, x3; \
-		x0 = r0 ^ rk[u ++]; \
-		x1 = r1 ^ rk[u ++]; \
-		x2 = r2 ^ rk[u ++]; \
-		x3 = r3 ^ rk[u ++]; \
-		AES_ROUND_NOKEY(x0, x1, x2, x3); \
-		x0 ^= rk[u ++]; \
-		x1 ^= rk[u ++]; \
-		x2 ^= rk[u ++]; \
-		x3 ^= rk[u ++]; \
-		AES_ROUND_NOKEY(x0, x1, x2, x3); \
-		x0 ^= rk[u ++]; \
-		x1 ^= rk[u ++]; \
-		x2 ^= rk[u ++]; \
-		x3 ^= rk[u ++]; \
-		AES_ROUND_NOKEY(x0, x1, x2, x3); \
-		x0 ^= rk[u ++]; \
-		x1 ^= rk[u ++]; \
-		x2 ^= rk[u ++]; \
-		x3 ^= rk[u ++]; \
-		AES_ROUND_NOKEY(x0, x1, x2, x3); \
-		l0 ^= x0; \
-		l1 ^= x1; \
-		l2 ^= x2; \
-		l3 ^= x3; \
-	} while (0)
-
-#define WROT(a, b, c, d)   do { \
-		sph_u32 t = d; \
-		d = c; \
-		c = b; \
-		b = a; \
-		a = t; \
-	} while (0)
-
-		C512_ELT(p0, p1, p2, p3, p4, p5, p6, p7);
-		C512_ELT(p8, p9, pA, pB, pC, pD, pE, pF);
-
-		WROT(p0, p4, p8, pC);
-		WROT(p1, p5, p9, pD);
-		WROT(p2, p6, pA, pE);
-		WROT(p3, p7, pB, pF);
-
-#undef C512_ELT
-#undef WROT
-	}
-	sc->h[0x0] ^= p0;
-	sc->h[0x1] ^= p1;
-	sc->h[0x2] ^= p2;
-	sc->h[0x3] ^= p3;
-	sc->h[0x4] ^= p4;
-	sc->h[0x5] ^= p5;
-	sc->h[0x6] ^= p6;
-	sc->h[0x7] ^= p7;
-	sc->h[0x8] ^= p8;
-	sc->h[0x9] ^= p9;
-	sc->h[0xA] ^= pA;
-	sc->h[0xB] ^= pB;
-	sc->h[0xC] ^= pC;
-	sc->h[0xD] ^= pD;
-	sc->h[0xE] ^= pE;
-	sc->h[0xF] ^= pF;
-}
-
-#else

 static void
 c512( sph_shavite_big_context *sc, const void *msg )
@@ -331,7 +125,7 @@ c512( sph_shavite_big_context *sc, const void *msg )
   for ( r = 0; r < 3; r ++ )
   {
      // round 1, 5, 9
-      k00 = mm_rotr_1x32( _mm_aesenc_si128( k00, m128_zero ) );
+      k00 = mm_ror_1x32( _mm_aesenc_si128( k00, m128_zero ) );
      k00 = _mm_xor_si128( k00, k13 ); 

      if ( r == 0 )
@@ -340,7 +134,7 @@ c512( sph_shavite_big_context *sc, const void *msg )

      x = _mm_xor_si128( p0, k00 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k01 = mm_rotr_1x32( _mm_aesenc_si128( k01, m128_zero ) );
+      k01 = mm_ror_1x32( _mm_aesenc_si128( k01, m128_zero ) );
      k01 = _mm_xor_si128( k01, k00 );

      if ( r == 1 )
@@ -349,33 +143,33 @@ c512( sph_shavite_big_context *sc, const void *msg )

      x = _mm_xor_si128( x, k01 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k02 = mm_rotr_1x32( _mm_aesenc_si128( k02, m128_zero ) );
+      k02 = mm_ror_1x32( _mm_aesenc_si128( k02, m128_zero ) );
      k02 = _mm_xor_si128( k02, k01 );

      x = _mm_xor_si128( x, k02 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k03 = mm_rotr_1x32( _mm_aesenc_si128( k03, m128_zero ) );
+      k03 = mm_ror_1x32( _mm_aesenc_si128( k03, m128_zero ) );
      k03 = _mm_xor_si128( k03, k02 );

      x = _mm_xor_si128( x, k03 );
      x = _mm_aesenc_si128( x, m128_zero );
      p3 = _mm_xor_si128( p3, x );
-      k10 = mm_rotr_1x32( _mm_aesenc_si128( k10, m128_zero ) );
+      k10 = mm_ror_1x32( _mm_aesenc_si128( k10, m128_zero ) );
      k10 = _mm_xor_si128( k10, k03 );

      x = _mm_xor_si128( p2, k10 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k11 = mm_rotr_1x32( _mm_aesenc_si128( k11, m128_zero ) );
+      k11 = mm_ror_1x32( _mm_aesenc_si128( k11, m128_zero ) );
      k11 = _mm_xor_si128( k11, k10 );

      x = _mm_xor_si128( x, k11 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k12 = mm_rotr_1x32( _mm_aesenc_si128( k12, m128_zero ) );
+      k12 = mm_ror_1x32( _mm_aesenc_si128( k12, m128_zero ) );
      k12 = _mm_xor_si128( k12, k11 );

      x = _mm_xor_si128( x, k12 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k13 = mm_rotr_1x32( _mm_aesenc_si128( k13, m128_zero ) );
+      k13 = mm_ror_1x32( _mm_aesenc_si128( k13, m128_zero ) );
      k13 = _mm_xor_si128( k13, k12 );

      if ( r == 2 )
@@ -388,80 +182,80 @@ c512( sph_shavite_big_context *sc, const void *msg )

      // round 2, 6, 10

-      k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13, 1 ) );
+      k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13 ) );
      x = _mm_xor_si128( p3, k00 );
      x = _mm_aesenc_si128( x, m128_zero );

-      k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00, 1 ) );
+      k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00 ) );
      x = _mm_xor_si128( x, k01 );
      x = _mm_aesenc_si128( x, m128_zero );

-      k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01, 1 ) );
+      k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01 ) );
      x = _mm_xor_si128( x, k02 );
      x = _mm_aesenc_si128( x, m128_zero );

-      k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02, 1 ) );
+      k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02 ) );
      x = _mm_xor_si128( x, k03 );
      x = _mm_aesenc_si128( x, m128_zero );

      p2 = _mm_xor_si128( p2, x );
-      k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03, 1 ) );
+      k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03 ) );
      x = _mm_xor_si128( p1, k10 );
      x = _mm_aesenc_si128( x, m128_zero );

-      k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10, 1 ) );
+      k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10 ) );
      x = _mm_xor_si128( x, k11 );
      x = _mm_aesenc_si128( x, m128_zero );

-      k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11, 1 ) );
+      k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11 ) );
      x = _mm_xor_si128( x, k12 );
      x = _mm_aesenc_si128( x, m128_zero );

-      k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12, 1 ) );
+      k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12 ) );
      x = _mm_xor_si128( x, k13 );
      x = _mm_aesenc_si128( x, m128_zero );
      p0 = _mm_xor_si128( p0, x );

      // round 3, 7, 11

-      k00 = mm_rotr_1x32( _mm_aesenc_si128( k00, m128_zero ) );
+      k00 = mm_ror_1x32( _mm_aesenc_si128( k00, m128_zero ) );
      k00 = _mm_xor_si128( k00, k13 );

      x = _mm_xor_si128( p2, k00 );
      x = _mm_aesenc_si128( x, m128_zero );

-      k01 = mm_rotr_1x32( _mm_aesenc_si128( k01, m128_zero ) );
+      k01 = mm_ror_1x32( _mm_aesenc_si128( k01, m128_zero ) );
      k01 = _mm_xor_si128( k01, k00 );

      x = _mm_xor_si128( x, k01 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k02 = mm_rotr_1x32( _mm_aesenc_si128( k02, m128_zero ) );
+      k02 = mm_ror_1x32( _mm_aesenc_si128( k02, m128_zero ) );
      k02 = _mm_xor_si128( k02, k01 );

      x = _mm_xor_si128( x, k02 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k03 = mm_rotr_1x32( _mm_aesenc_si128( k03, m128_zero ) );
+      k03 = mm_ror_1x32( _mm_aesenc_si128( k03, m128_zero ) );
      k03 = _mm_xor_si128( k03, k02 );

      x = _mm_xor_si128( x, k03 );
      x = _mm_aesenc_si128( x, m128_zero );
      p1 = _mm_xor_si128( p1, x );
-      k10 = mm_rotr_1x32( _mm_aesenc_si128( k10, m128_zero ) );
+      k10 = mm_ror_1x32( _mm_aesenc_si128( k10, m128_zero ) );
      k10 = _mm_xor_si128( k10, k03 );

      x = _mm_xor_si128( p0, k10 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k11 = mm_rotr_1x32( _mm_aesenc_si128( k11, m128_zero ) );
+      k11 = mm_ror_1x32( _mm_aesenc_si128( k11, m128_zero ) );
      k11 = _mm_xor_si128( k11, k10 );

      x = _mm_xor_si128( x, k11 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k12 = mm_rotr_1x32( _mm_aesenc_si128( k12, m128_zero ) );
+      k12 = mm_ror_1x32( _mm_aesenc_si128( k12, m128_zero ) );
      k12 = _mm_xor_si128( k12, k11 );

      x = _mm_xor_si128( x, k12 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k13 = mm_rotr_1x32( _mm_aesenc_si128( k13, m128_zero ) );
+      k13 = mm_ror_1x32( _mm_aesenc_si128( k13, m128_zero ) );
      k13 = _mm_xor_si128( k13, k12 );

      x = _mm_xor_si128( x, k13 );
@@ -470,36 +264,36 @@ c512( sph_shavite_big_context *sc, const void *msg )

      // round 4, 8, 12

-      k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13, 1 ) );
+      k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13 ) );

      x = _mm_xor_si128( p1, k00 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00, 1 ) );
+      k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00 ) );

      x = _mm_xor_si128( x, k01 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01, 1 ) );
+      k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01 ) );

      x = _mm_xor_si128( x, k02 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02, 1 ) );
+      k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02 ) );

      x = _mm_xor_si128( x, k03 );
      x = _mm_aesenc_si128( x, m128_zero );
      p0 = _mm_xor_si128( p0, x );
-      k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03, 1 ) );
+      k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03 ) );

      x = _mm_xor_si128( p3, k10 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10, 1 ) );
+      k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10 ) );

      x = _mm_xor_si128( x, k11 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11, 1 ) );
+      k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11 ) );

      x = _mm_xor_si128( x, k12 );
      x = _mm_aesenc_si128( x, m128_zero );
-      k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12, 1 ) );
+      k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12 ) );

      x = _mm_xor_si128( x, k13 );
      x = _mm_aesenc_si128( x, m128_zero );
@@ -508,44 +302,44 @@ c512( sph_shavite_big_context *sc, const void *msg )

   // round 13

-   k00 = mm_rotr_1x32( _mm_aesenc_si128( k00, m128_zero ) );
+   k00 = mm_ror_1x32( _mm_aesenc_si128( k00, m128_zero ) );
   k00 = _mm_xor_si128( k00, k13 );

   x = _mm_xor_si128( p0, k00 );
   x = _mm_aesenc_si128( x, m128_zero );
-   k01 = mm_rotr_1x32( _mm_aesenc_si128( k01, m128_zero ) ); 
+   k01 = mm_ror_1x32( _mm_aesenc_si128( k01, m128_zero ) ); 
   k01 = _mm_xor_si128( k01, k00 );

   x = _mm_xor_si128( x, k01 );
   x = _mm_aesenc_si128( x, m128_zero );
-   k02 = mm_rotr_1x32( _mm_aesenc_si128( k02, m128_zero ) );
+   k02 = mm_ror_1x32( _mm_aesenc_si128( k02, m128_zero ) );
   k02 = _mm_xor_si128( k02, k01 );

   x = _mm_xor_si128( x, k02 );
   x = _mm_aesenc_si128( x, m128_zero );
-   k03 = mm_rotr_1x32( _mm_aesenc_si128( k03, m128_zero ) );
+   k03 = mm_ror_1x32( _mm_aesenc_si128( k03, m128_zero ) );
   k03 = _mm_xor_si128( k03, k02 );

   x = _mm_xor_si128( x, k03 );
   x = _mm_aesenc_si128( x, m128_zero );
   p3 = _mm_xor_si128( p3, x );
-   k10 = mm_rotr_1x32( _mm_aesenc_si128( k10, m128_zero ) );
+   k10 = mm_ror_1x32( _mm_aesenc_si128( k10, m128_zero ) );
   k10 = _mm_xor_si128( k10, k03 );

   x = _mm_xor_si128( p2, k10 );
   x = _mm_aesenc_si128( x, m128_zero );
-   k11 = mm_rotr_1x32( _mm_aesenc_si128( k11, m128_zero ) );
+   k11 = mm_ror_1x32( _mm_aesenc_si128( k11, m128_zero ) );
   k11 = _mm_xor_si128( k11, k10 );

   x = _mm_xor_si128( x, k11 );
   x = _mm_aesenc_si128( x, m128_zero );
-   k12 = mm_rotr_1x32( _mm_aesenc_si128( k12, m128_zero ) );
+   k12 = mm_ror_1x32( _mm_aesenc_si128( k12, m128_zero ) );
   k12 = _mm_xor_si128( k12, _mm_xor_si128( k11, _mm_set_epi32(
               ~sc->count2, sc->count3, sc->count0, sc->count1 ) ) );

   x = _mm_xor_si128( x, k12 );
   x = _mm_aesenc_si128( x, m128_zero );
-   k13 = mm_rotr_1x32( _mm_aesenc_si128( k13, m128_zero ) );
+   k13 = mm_ror_1x32( _mm_aesenc_si128( k13, m128_zero ) );
   k13 = _mm_xor_si128( k13, k12 );

   x = _mm_xor_si128( x, k13 );
@@ -558,7 +352,6 @@ c512( sph_shavite_big_context *sc, const void *msg )
   h[3] = _mm_xor_si128( h[3], p1 );
 }

-#endif

 static void
 shavite_big_aesni_init( sph_shavite_big_context *sc, const sph_u32 *iv )
--- a/algo/yescrypt/yescrypt-simd.c
+++ b/algo/yescrypt/yescrypt-simd.c
@@ -1363,10 +1363,11 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
 	   {
 		HMAC_SHA256_CTX ctx;
 		HMAC_SHA256_Init(&ctx, buf, buflen);
-                if ( client_key_hack ) // GlobalBoost-Y buggy yescrypt
+                if ( yescrypt_client_key )
+                    HMAC_SHA256_Update( &ctx, (uint8_t*)yescrypt_client_key,
+                                        yescrypt_client_key_len );
+                else
                    HMAC_SHA256_Update( &ctx, salt, saltlen );
-                else // Proper yescrypt
-                        HMAC_SHA256_Update(&ctx, "Client Key", 10);
 		HMAC_SHA256_Final(sha256, &ctx);
 	   }
 	   /* Compute StoredKey */
--- a/algo/yescrypt/yescrypt.c
+++ b/algo/yescrypt/yescrypt.c
@@ -25,7 +25,7 @@
 #include "compat.h"

 #include "yescrypt.h"
-
+#include "sha256_Y.h"
 #include "algo-gate-api.h"

 #define BYTES2CHARS(bytes) \
@@ -366,7 +366,8 @@ static int yescrypt_bsty(const uint8_t * passwd, size_t passwdlen,
 uint64_t YESCRYPT_N;
 uint32_t YESCRYPT_R;
 uint32_t YESCRYPT_P;
-bool client_key_hack;
+char *yescrypt_client_key = NULL;
+int yescrypt_client_key_len = 0;

 /* main hash 80 bytes input */
 void yescrypt_hash( const char *input, char *output, uint32_t len )
@@ -436,7 +437,8 @@ bool register_yescrypt_algo( algo_gate_t* gate )
 {
   yescrypt_gate_base( gate );
   gate->get_max64  = (void*)&yescrypt_get_max64;
-   client_key_hack = true;
+   yescrypt_client_key = NULL;
+   yescrypt_client_key_len = 0;
   YESCRYPT_N = 2048;
   YESCRYPT_R = 8;
   YESCRYPT_P = 1;
@@ -447,7 +449,8 @@ bool register_yescryptr8_algo( algo_gate_t* gate )
 {
   yescrypt_gate_base( gate );
   gate->get_max64  = (void*)&yescrypt_get_max64;
-   client_key_hack = false;
+   yescrypt_client_key = "Client Key";
+   yescrypt_client_key_len = 10;
   YESCRYPT_N = 2048;
   YESCRYPT_R = 8;
   YESCRYPT_P = 1;
@@ -458,10 +461,23 @@ bool register_yescryptr16_algo( algo_gate_t* gate )
 {
   yescrypt_gate_base( gate );
   gate->get_max64  = (void*)&yescryptr16_get_max64;
-   client_key_hack = false;
+   yescrypt_client_key = "Client Key";
+   yescrypt_client_key_len = 10;
   YESCRYPT_N = 4096;   
   YESCRYPT_R = 16;   
   YESCRYPT_P = 1;   
   return true;
 }

+bool register_yescryptr32_algo( algo_gate_t* gate )
+{
+   yescrypt_gate_base( gate );
+   gate->get_max64  = (void*)&yescryptr16_get_max64;
+   yescrypt_client_key = "WaviBanana";
+   yescrypt_client_key_len = 10;
+   YESCRYPT_N = 4096;
+   YESCRYPT_R = 32;
+   YESCRYPT_P = 1;
+   return true;
+}
+
--- a/algo/yescrypt/yescrypt.h
+++ b/algo/yescrypt/yescrypt.h
@@ -108,7 +108,8 @@ typedef enum {
 	__YESCRYPT_INIT_SHARED = 0x30000
 } yescrypt_flags_t;

-extern bool client_key_hack;    // true for GlobalBoost-Y
+extern char *yescrypt_client_key;
+extern int yescrypt_client_key_len;


 #define YESCRYPT_KNOWN_FLAGS \
--- a/api.c
+++ b/api.c
@@ -98,6 +98,7 @@ extern int opt_api_remote;
 extern double global_hashrate;
 extern uint32_t accepted_count;
 extern uint32_t rejected_count;
+extern uint32_t solved_count;

 #define cpu_threads opt_n_threads

@@ -138,8 +139,7 @@ static char *getsummary( char *params )
   double accps = (60.0 * accepted_count) / (uptime ? uptime : 1.0);
   double diff = net_diff > 0. ? net_diff : stratum_diff;
   char diff_str[16];
-   double hashrate = (double)global_hashrate;
-   char units[4] = {0};
+   double hrate = (double)global_hashrate;
   struct cpu_info cpu = { 0 };
 #ifdef USE_MONITORING
   cpu.has_monitoring = true;
@@ -157,16 +157,15 @@ static char *getsummary( char *params )
       sprintf( diff_str, "%.6f", diff);

   *buffer = '\0';
-   scale_hash_for_display ( &hashrate, units );
-
   sprintf( buffer, "NAME=%s;VER=%s;API=%s;"
-                    "ALGO=%s;CPUS=%d;%sH/s=%.2f;ACC=%d;REJ=%d;"
+                    "ALGO=%s;CPUS=%d;URL=%s;"
+                    "HS=%.2f;KHS=%.2f;ACC=%d;REJ=%d;SOL=%d;"
                    "ACCMN=%.3f;DIFF=%s;TEMP=%.1f;FAN=%d;FREQ=%d;"
                    "UPTIME=%.0f;TS=%u|",
                    PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
-                    algo, opt_n_threads, units, hashrate,
-                    accepted_count, rejected_count, accps, diff_str,
-                    cpu.cpu_temp, cpu.cpu_fan, cpu.cpu_clock,
+                    algo, opt_n_threads, short_url, hrate, hrate/1000.0,
+                    accepted_count, rejected_count, solved_count,
+                    accps, diff_str, cpu.cpu_temp, cpu.cpu_fan, cpu.cpu_clock,
                    uptime, (uint32_t) ts);
   return buffer;
 }
--- a/avxdefs.h
+++ b/avxdefs.h
--- a/20
+++ b/20
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.4.
+# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.4.1.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='cpuminer-opt'
 PACKAGE_TARNAME='cpuminer-opt'
-PACKAGE_VERSION='3.8.4'
-PACKAGE_STRING='cpuminer-opt 3.8.4'
+PACKAGE_VERSION='3.8.4.1'
+PACKAGE_STRING='cpuminer-opt 3.8.4.1'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''

@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures cpuminer-opt 3.8.4 to adapt to many kinds of systems.
+\`configure' configures cpuminer-opt 3.8.4.1 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@@ -1392,7 +1392,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of cpuminer-opt 3.8.4:";;
+     short | recursive ) echo "Configuration of cpuminer-opt 3.8.4.1:";;
   esac
  cat <<\_ACEOF

@@ -1497,7 +1497,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-cpuminer-opt configure 3.8.4
+cpuminer-opt configure 3.8.4.1
 generated by GNU Autoconf 2.69

 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by cpuminer-opt $as_me 3.8.4, which was
+It was created by cpuminer-opt $as_me 3.8.4.1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  $ $0 $@
@@ -2981,7 +2981,7 @@ fi

 # Define the identity of the package.
 PACKAGE='cpuminer-opt'
- VERSION='3.8.4'
+ VERSION='3.8.4.1'


 cat >>confdefs.h <<_ACEOF
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by cpuminer-opt $as_me 3.8.4, which was
+This file was extended by cpuminer-opt $as_me 3.8.4.1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@@ -6743,7 +6743,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-cpuminer-opt config.status 3.8.4
+cpuminer-opt config.status 3.8.4.1
 configured by $0, generated by GNU Autoconf 2.69,
  with options \\"\$ac_cs_config\\"

--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([cpuminer-opt], [3.8.3.1])
+AC_INIT([cpuminer-opt], [3.8.4.1])

 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -103,7 +103,7 @@ enum algos opt_algo = ALGO_NULL;
 int opt_scrypt_n = 0;
 int opt_pluck_n = 128;
 int opt_n_threads = 0;
-#ifdef __GNUC__
+#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
 __int128_t opt_affinity = -1LL;
 #else
 int64_t opt_affinity = -1LL;
@@ -177,7 +177,7 @@ static struct work g_work = {{ 0 }};
 time_t g_work_time = 0;
 static        pthread_mutex_t g_work_lock;
 static bool   submit_old = false;
-static char*  lp_id;
+char*  lp_id;

 static void   workio_cmd_free(struct workio_cmd *wc);

@@ -200,7 +200,7 @@ static inline void drop_policy(void)
 #define pthread_setaffinity_np(tid,sz,s) {} /* only do process affinity */
 #endif

-#ifdef __GNUC__
+#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
 static void affine_to_cpu_mask( int id, unsigned __int128 mask )
 #else
 static void affine_to_cpu_mask( int id, unsigned long long mask )
@@ -213,7 +213,7 @@ static void affine_to_cpu_mask( int id, unsigned long long mask )
   for ( uint8_t i = 0; i < ncpus; i++ ) 
   {
      // cpu mask
-#ifdef __GNUC__
+#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
      if( ( mask & ( (unsigned __int128)1ULL << i ) ) )  CPU_SET( i, &set );
 #else
      if( (ncpus > 64) || ( mask & (1ULL << i) ) )  CPU_SET( i, &set );
@@ -433,6 +433,8 @@ static bool get_mininginfo(CURL *curl, struct work *work)
 	return true;
 }

+// hodl needs 4 but leave it at 3 until gbt better understood
+//#define BLOCK_VERSION_CURRENT 3
 #define BLOCK_VERSION_CURRENT 4

 static bool gbt_work_decode( const json_t *val, struct work *work )
@@ -454,25 +456,24 @@ static bool gbt_work_decode(const json_t *val, struct work *work)
   bool rc = false;

   tmp = json_object_get( val, "mutable" );
-	if (tmp && json_is_array(tmp)) {
+   if ( tmp && json_is_array( tmp ) )
+   {
      n = (int) json_array_size( tmp );
-	   for (i = 0; i < n; i++) {
+      for ( i = 0; i < n; i++ )
+      {
         const char *s = json_string_value( json_array_get( tmp, i ) );
         if ( !s )
            continue;
-		if (!strcmp(s, "coinbase/append"))
-			coinbase_append = true;
-		else if (!strcmp(s, "submit/coinbase"))
-			submit_coinbase = true;
-		else if (!strcmp(s, "version/force"))
-			version_force = true;
-		else if (!strcmp(s, "version/reduce"))
-			version_reduce = true;
+         if      ( !strcmp( s, "coinbase/append" ) ) coinbase_append = true;
+         else if ( !strcmp( s, "submit/coinbase" ) ) submit_coinbase = true;
+         else if ( !strcmp( s, "version/force" ) )   version_force   = true;
+         else if ( !strcmp( s, "version/reduce" ) )  version_reduce  = true;
      }
   }

   tmp = json_object_get( val, "height" );
-	if (!tmp || !json_is_integer(tmp)) {
+   if ( !tmp || !json_is_integer( tmp ) )
+   {
      applog( LOG_ERR, "JSON invalid height" );
      goto out;
   }
@@ -480,20 +481,26 @@ static bool gbt_work_decode(const json_t *val, struct work *work)
   applog( LOG_BLUE, "Current block is %d", work->height );

   tmp = json_object_get(val, "version");
-	if (!tmp || !json_is_integer(tmp)) {
+   if ( !tmp || !json_is_integer( tmp ) )
+   {
      applog( LOG_ERR, "JSON invalid version" );
      goto out;
   }
   version = (uint32_t) json_integer_value( tmp );
-	if ((version & 0xffU) > BLOCK_VERSION_CURRENT) {
-	   if (version_reduce) {
+   if ( (version & 0xffU) > BLOCK_VERSION_CURRENT )
+   {
+      if ( version_reduce )
+      {
         version = ( version & ~0xffU ) | BLOCK_VERSION_CURRENT;
-	   } else if (have_gbt && allow_getwork && !version_force) {
-		applog(LOG_DEBUG, "Switching to getwork, gbt version %d",
-                             version);
+      }
+      else if ( have_gbt && allow_getwork && !version_force )
+      {
+         applog( LOG_DEBUG, "Switching to getwork, gbt version %d", version );
         have_gbt = false;
         goto out;
-	   } else if (!version_force) {
+      }
+      else if ( !version_force )
+      {
         applog(LOG_ERR, "Unrecognized block version: %u", version);
         goto out;
      }
@@ -507,29 +514,34 @@ static bool gbt_work_decode(const json_t *val, struct work *work)
   }

   tmp = json_object_get( val, "curtime" );
-	if (!tmp || !json_is_integer(tmp)) {
+   if ( !tmp || !json_is_integer( tmp ) )
+   {
      applog( LOG_ERR, "JSON invalid curtime" );
      goto out;
   }
   curtime = (uint32_t) json_integer_value(tmp);

-	if (unlikely(!jobj_binary(val, "bits", &bits, sizeof(bits)))) {
+   if ( unlikely( !jobj_binary( val, "bits", &bits, sizeof(bits) ) ) )
+   {
      applog(LOG_ERR, "JSON invalid bits");
      goto out;
   }

   /* find count and size of transactions */
   txa = json_object_get(val, "transactions" );
-	if (!txa || !json_is_array(txa)) {
+   if ( !txa || !json_is_array( txa ) )
+   {
      applog( LOG_ERR, "JSON invalid transactions" );
      goto out;
   }
   tx_count = (int) json_array_size( txa );
   tx_size = 0;
-	for (i = 0; i < tx_count; i++) {
+   for ( i = 0; i < tx_count; i++ )
+   {
      const json_t *tx = json_array_get( txa, i );
      const char *tx_hex = json_string_value( json_object_get( tx, "data" ) );
-		if (!tx_hex) {
+      if ( !tx_hex )
+      {
         applog( LOG_ERR, "JSON invalid transactions" );
         goto out;
      }
@@ -538,30 +550,39 @@ static bool gbt_work_decode(const json_t *val, struct work *work)

   /* build coinbase transaction */
   tmp = json_object_get( val, "coinbasetxn" );
-	if (tmp) {
+   if ( tmp )
+   {
      const char *cbtx_hex = json_string_value( json_object_get( tmp, "data" ));
      cbtx_size = cbtx_hex ? (int) strlen( cbtx_hex ) / 2 : 0;
      cbtx = (uchar*) malloc( cbtx_size + 100 );
-		if (cbtx_size < 60 || !hex2bin(cbtx, cbtx_hex, cbtx_size)) {
+      if ( cbtx_size < 60 || !hex2bin( cbtx, cbtx_hex, cbtx_size ) )
+      {
         applog( LOG_ERR, "JSON invalid coinbasetxn" );
         goto out;
      }
-	} else {
+   }
+   else
+   {
      int64_t cbvalue;
-		if (!pk_script_size) {
-			if (allow_getwork) {
+      if ( !pk_script_size )
+      {
+         if ( allow_getwork )
+         {
            applog( LOG_INFO, "No payout address provided, switching to getwork");
            have_gbt = false;
-			} else
+         }
+         else
            applog( LOG_ERR, "No payout address provided" );
         goto out;
      }
      tmp = json_object_get( val, "coinbasevalue" );
-		if (!tmp || !json_is_number(tmp)) {
+      if ( !tmp || !json_is_number( tmp ) )
+      {
         applog( LOG_ERR, "JSON invalid coinbasevalue" );
         goto out;
      }
-		cbvalue = (int64_t) (json_is_integer(tmp) ? json_integer_value(tmp) : json_number_value(tmp));
+      cbvalue = (int64_t) ( json_is_integer( tmp ) ? json_integer_value( tmp )
+                                                   : json_number_value( tmp ) );
      cbtx = (uchar*) malloc(256);
      le32enc( (uint32_t *)cbtx, 1 ); /* version */
      cbtx[4] = 1; /* in-counter */
@@ -590,13 +611,18 @@ static bool gbt_work_decode(const json_t *val, struct work *work)
   {
      unsigned char xsig[100];
      int xsig_len = 0;
-	   if (*coinbase_sig) {
+      if ( *coinbase_sig )
+      {
         n = (int) strlen( coinbase_sig );
-		if (cbtx[41] + xsig_len + n <= 100) {
+         if ( cbtx[41] + xsig_len + n <= 100 )
+         {
            memcpy( xsig+xsig_len, coinbase_sig, n );
            xsig_len += n;
-		} else {
-			applog(LOG_WARNING, "Signature does not fit in coinbase, skipping");
+         }
+         else
+         {
+            applog( LOG_WARNING,
+                        "Signature does not fit in coinbase, skipping" );
         }
      }
      tmp = json_object_get( val, "coinbaseaux" );
@@ -608,11 +634,13 @@ static bool gbt_work_decode(const json_t *val, struct work *work)
            unsigned char buf[100];
            const char *s = json_string_value( json_object_iter_value( iter ) );
            n = s ? (int) ( strlen(s) / 2 ) : 0;
-			if (!s || n > 100 || !hex2bin(buf, s, n)) {
+            if ( !s || n > 100 || !hex2bin( buf, s, n ) )
+            {
               applog(LOG_ERR, "JSON invalid coinbaseaux");
               break;
            }
-			if (cbtx[41] + xsig_len + n <= 100) {
+            if ( cbtx[41] + xsig_len + n <= 100 )
+            {
               memcpy( xsig+xsig_len, buf, n );
               xsig_len += n;
            }
@@ -674,16 +702,9 @@ static bool gbt_work_decode(const json_t *val, struct work *work)
   }

   /* assemble block header */
-	work->data[0] = swab32(version);
-	for (i = 0; i < 8; i++)
-		work->data[8 - i] = le32dec(prevhash + i);
-	for (i = 0; i < 8; i++)
-		work->data[9 + i] = be32dec((uint32_t *)merkle_tree[0] + i);
-	work->data[17] = swab32(curtime);
-	work->data[18] = le32dec(&bits);
-	memset(work->data + 19, 0x00, 52);
-	work->data[20] = 0x80000000;
-	work->data[31] = 0x00000280;
+   algo_gate.build_block_header( work, swab32( version ),
+                                 (uint32_t*) prevhash, (uint32_t*) merkle_tree,
+                                 swab32( curtime ), le32dec( &bits ) );

   if ( unlikely( !jobj_binary(val, "target", target, sizeof(target)) ) )
   {
@@ -696,7 +717,8 @@ static bool gbt_work_decode(const json_t *val, struct work *work)
   tmp = json_object_get( val, "workid" );
   if ( tmp )
   {
-	   if (!json_is_string(tmp)) {
+   if ( !json_is_string( tmp ) )
+   {
      applog( LOG_ERR, "JSON invalid workid" );
      goto out;
   }
@@ -715,7 +737,8 @@ out:
      {
         char *lp_uri;
         tmp = json_object_get( val, "longpolluri" );
-		lp_uri = json_is_string(tmp) ? strdup(json_string_value(tmp)) : rpc_url;
+         lp_uri = json_is_string( tmp ) ? strdup( json_string_value( tmp ) )
+                                        : rpc_url;
         have_longpoll = true;
         tq_push(thr_info[longpoll_thr_id].q, lp_uri);
      }
@@ -781,7 +804,7 @@ static int share_result( int result, struct work *work, const char *reason )
       hashrate += thr_hashrates[i];
   }
   result ? accepted_count++ : rejected_count++;
-/*
+
   if ( solved )
   {
      solved_count++;
@@ -790,7 +813,7 @@ static int share_result( int result, struct work *work, const char *reason )
      else
         sprintf( sol, " Solved %d", solved_count ); 
   }
-*/
+
   pthread_mutex_unlock(&stats_lock);
   global_hashcount = hashcount;
   global_hashrate = hashrate;
@@ -1049,45 +1072,12 @@ bool jr2_submit_getwork_result( CURL *curl, struct work *work )
   return true;
 }

-static bool submit_upstream_work( CURL *curl, struct work *work )
+char* std_malloc_txs_request( struct work *work )
 {
-   json_t *val, *res;
-   char req[JSON_BUF_LEN];
-   int i;
-
-   /* pass if the previous hash is not the current previous hash */
-   if ( !submit_old && memcmp( &work->data[1], &g_work.data[1], 32 ) )
-   {
-      if (opt_debug)
-         applog(LOG_DEBUG, "DEBUG: stale work detected, discarding");
-      return true;
-   }
-   if ( !have_stratum && allow_mininginfo )
-   {
-      struct work wheight;
-      get_mininginfo( curl, &wheight );
-      if ( work->height && work->height <= net_blocks )
-      {
-         if (opt_debug)
-	    applog(LOG_WARNING, "block %u was already solved", work->height);
-	 return true;
-      }
-   }
-   if ( have_stratum )
-   {
-       stratum.sharediff = work->sharediff;
-       algo_gate.build_stratum_request( req, work, &stratum );
-       if ( unlikely( !stratum_send_line( &stratum, req ) ) )
-       {
-          applog(LOG_ERR, "submit_upstream_work stratum_send_line failed");
-          return false;
-       }
-       return true;
-   }
-   else if (work->txs)
-   {
-      char data_str[2 * sizeof(work->data) + 1];
  char *req;
+  json_t *val;
+  char data_str[2 * sizeof(work->data) + 1];
+  int i;

  for ( i = 0; i < ARRAY_SIZE(work->data); i++ )
     be32enc( work->data + i, work->data[i] );
@@ -1099,7 +1089,8 @@ static bool submit_upstream_work( CURL *curl, struct work *work )
    json_object_set_new( val, "workid", json_string( work->workid ) );
    params = json_dumps( val, 0 );
    json_decref( val );
-	 req = (char*) malloc(128 + 2 * 80 + strlen(work->txs) + strlen(params));
+    req = (char*) malloc( 128 + 2 * 80 + strlen( work->txs )
+                            + strlen( params ) );
    sprintf( req,
     "{\"method\": \"submitblock\", \"params\": [\"%s%s\", %s], \"id\":4}\r\n",
      data_str, work->txs, params );
@@ -1112,9 +1103,52 @@ static bool submit_upstream_work( CURL *curl, struct work *work )
         "{\"method\": \"submitblock\", \"params\": [\"%s%s\"], \"id\":4}\r\n",
         data_str, work->txs);
  }
+  return req;
+} 

+static bool submit_upstream_work( CURL *curl, struct work *work )
+{
+   /* pass if the previous hash is not the current previous hash */
+   if ( !submit_old && memcmp( &work->data[1], &g_work.data[1], 32 ) )
+   {
+      if (opt_debug)
+         applog(LOG_DEBUG, "DEBUG: stale work detected, discarding");
+      return true;
+   }
+
+   if ( !have_stratum && allow_mininginfo )
+   {
+      struct work wheight;
+      get_mininginfo( curl, &wheight );
+      if ( work->height && work->height <= net_blocks )
+      {
+         if (opt_debug)
+	    applog(LOG_WARNING, "block %u was already solved", work->height);
+	 return true;
+      }
+   }
+
+   if ( have_stratum )
+   {
+       char req[JSON_BUF_LEN];
+       stratum.sharediff = work->sharediff;
+       algo_gate.build_stratum_request( req, work, &stratum );
+       if ( unlikely( !stratum_send_line( &stratum, req ) ) )
+       {
+          applog(LOG_ERR, "submit_upstream_work stratum_send_line failed");
+          return false;
+       }
+       return true;
+   }
+   else if ( work->txs )
+   {
+      char *req = NULL;
+      json_t *val, *res;
+
+      req = algo_gate.malloc_txs_request( work );
      val = json_rpc_call( curl, rpc_url, rpc_userpass, req, NULL, 0 );
      free( req );
+
      if ( unlikely( !val ) )
      {
         applog( LOG_ERR, "submit_upstream_work json_rpc_call failed" );
@@ -1148,7 +1182,7 @@ static bool submit_upstream_work( CURL *curl, struct work *work )
       return algo_gate.submit_getwork_result( curl, work );
 }

-static const char *getwork_req =
+const char *getwork_req =
 	"{\"method\": \"getwork\", \"params\": [], \"id\":0}\r\n";

 #define GBT_CAPABILITIES "[\"coinbasetxn\", \"coinbasevalue\", \"longpoll\", \"workid\"]"
@@ -1156,7 +1190,7 @@ static const char *getwork_req =
 static const char *gbt_req =
 	"{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": "
 	GBT_CAPABILITIES "}], \"id\":0}\r\n";
-static const char *gbt_lp_req =
+const char *gbt_lp_req =
 	"{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": "
 	GBT_CAPABILITIES ", \"longpollid\": \"%s\"}], \"id\":0}\r\n";

@@ -1179,8 +1213,8 @@ start:
   else
   {
      val = json_rpc_call( curl, rpc_url, rpc_userpass,
-		                    have_gbt ? gbt_req : getwork_req,
-		                    &err, have_gbt ? JSON_RPC_QUIET_404 : 0);
+		           have_gbt ? gbt_req : getwork_req, &err,
+                           have_gbt ? JSON_RPC_QUIET_404 : 0);
   }
   gettimeofday( &tv_end, NULL );

@@ -1626,16 +1660,18 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
 {
   uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );

-   // This logic depends on expression short circuiting to prevent tripping
-   // over NULL job_id pointers when benchmarking.
-   if ( ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
-          && clean_job )
-      || ( *nonceptr >= *end_nonce_ptr )
-      || ( !opt_benchmark && strcmp( work->job_id, g_work->job_id ) ) )
-   {
-     if ( *nonceptr >= *end_nonce_ptr )
-         algo_gate.stratum_gen_work( &stratum, g_work );
+// the job_id check doesn't work as intended, it's a char pointer!
+// For stratum the pointers can be dereferenced and the strings compared,
+// benchmark not, getwork & gbt unsure.
+//    || ( have_straum && strcmp( work->job_id, g_work->job_id ) ) ) )
+// or
+//    || ( !benchmark && strcmp( work->job_id, g_work->job_id ) ) ) )
+// For now leave it as is, it seems stable.

+   if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
+      && ( clean_job || ( *nonceptr >= *end_nonce_ptr )
+         || ( work->job_id != g_work->job_id ) ) )
+   {
     work_free( work );
     work_copy( work, g_work );
     *nonceptr = 0xffffffffU / opt_n_threads * thr_id;
@@ -1756,7 +1792,7 @@ static void *miner_thread( void *userdata )
         if (opt_debug)
            applog( LOG_DEBUG, "Binding thread %d to cpu %d (mask %x)",
                   thr_id, thr_id % num_cpus, ( 1ULL << (thr_id % num_cpus) ) );
-#ifdef __GNUC__
+#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
         affine_to_cpu_mask( thr_id,
                             (unsigned __int128)1LL << (thr_id % num_cpus) );
 #else
@@ -1791,6 +1827,8 @@ static void *miner_thread( void *userdata )
          {
              algo_gate.wait_for_diff( &stratum );
 	      pthread_mutex_lock( &g_work_lock );
+              if ( *algo_gate.get_nonceptr( work.data ) >= end_nonce )
+                 algo_gate.stratum_gen_work( &stratum, &g_work );
              algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce,
                                      stratum.job.clean );
              pthread_mutex_unlock( &g_work_lock );
@@ -1876,7 +1914,7 @@ static void *miner_thread( void *userdata )
       gettimeofday( (struct timeval *) &tv_start, NULL );

       // Scan for nonce
-       nonce_found = (bool) algo_gate.scanhash( thr_id, &work, max_nonce,
+       nonce_found = algo_gate.scanhash( thr_id, &work, max_nonce,
                                         &hashes_done );

       // record scanhash elapsed time
@@ -1891,37 +1929,31 @@ static void *miner_thread( void *userdata )
 	  pthread_mutex_unlock( &stats_lock );
       }

-       // if nonce(s) submit work 
+       // if nonce(s) found submit work 
       if ( nonce_found && !opt_benchmark )
-       {
-/*
-          int num_submitted = 0;
-
+       {  // 4 way with multiple nonces, copy individually to work and submit.
+          if ( nonce_found > 1 )
          for ( int n = 0; n < nonce_found; n++ )
          {
             *algo_gate.get_nonceptr( work.data ) = work.nonces[n];
             if ( submit_work( mythr, &work ) )
-             {
                applog( LOG_NOTICE, "Share submitted." );
-                num_submitted++;
-             }
             else
             {
                applog( LOG_WARNING, "Failed to submit share." );
                break;
             }
          }
-          // must be a one way algo, nonce is already in work data
-          if ( !num_submitted )
-          {
-*/
+          else
+          {  // only 1 nonce, in work ready to submit.
+
             if ( !submit_work( mythr, &work ) )
             {
                applog( LOG_WARNING, "Failed to submit share." );
                break;
             }
             applog( LOG_NOTICE, "Share submitted." );
-//          }
+          }

          // prevent stale work in solo
          // we can't submit twice a block!
@@ -2006,18 +2038,14 @@ json_t *std_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
 {
   json_t *val;
   char *req = NULL;
-//   if (have_gbt)
-//   {
+   if (have_gbt)
+   {
       req = (char*) malloc( strlen(gbt_lp_req) + strlen(lp_id) + 1 );
       sprintf( req, gbt_lp_req, lp_id );
-//   }
-//TODO this code makes no sense, this first call should be removed.
-// also remove conditional expression in second call, no getwork.
-//   val = json_rpc_call( curl, rpc_url, rpc_userpass, getwork_req, err,
-//                        JSON_RPC_LONGPOLL );
-//   val = json_rpc_call( curl, lp_url, rpc_userpass, req ? req : getwork_req,
-//                        err, JSON_RPC_LONGPOLL);
-   val = json_rpc_call( curl, lp_url, rpc_userpass, req,
+   }
+   val = json_rpc_call( curl, rpc_url, rpc_userpass, getwork_req, err,
+                        JSON_RPC_LONGPOLL );
+   val = json_rpc_call( curl, lp_url, rpc_userpass, req ? req : getwork_req,
                        err, JSON_RPC_LONGPOLL);
   free(req);
   return val;
@@ -2235,27 +2263,45 @@ out:
 	return ret;
 }

-void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
+// used by stratum and gbt
+void std_build_block_header( struct work* g_work, uint32_t version,
+                             uint32_t *prevhash, uint32_t *merkle_tree, 
+                             uint32_t ntime, uint32_t nbits )
 {
-   uchar merkle_root[64] = { 0 };
-   size_t t;
   int i;

-   algo_gate.gen_merkle_root( merkle_root, sctx );
+   memset( g_work->data, 0, sizeof(g_work->data) );
+   g_work->data[0] = version;
+
+   if ( have_stratum )
+      for ( i = 0; i < 8; i++ )
+         g_work->data[ 1+i ] = le32dec( prevhash + i );
+   else
+      for (i = 0; i < 8; i++)
+         g_work->data[ 8-i ] = le32dec( prevhash + i );
+
+   for ( i = 0; i < 8; i++ )
+      g_work->data[ 9+i ] = be32dec( merkle_tree + i );
+
+   g_work->data[ algo_gate.ntime_index ] = ntime;
+   g_work->data[ algo_gate.nbits_index ] = nbits;
+   g_work->data[20] = 0x80000000;
+   g_work->data[31] = 0x00000280;
+}
+
+void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
+{
+   uchar merkle_tree[64] = { 0 };
+   size_t t;
+
+   algo_gate.gen_merkle_root( merkle_tree, sctx );
   // Increment extranonce2
   for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
   // Assemble block header
-   memset( g_work->data, 0, sizeof(g_work->data) );
-   g_work->data[0] = le32dec( sctx->job.version );
-   for ( i = 0; i < 8; i++ )
-      g_work->data[1 + i] = le32dec( (uint32_t *) sctx->job.prevhash + i );
-   for ( i = 0; i < 8; i++ )
-      g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );

-   g_work->data[ algo_gate.ntime_index ] = le32dec(sctx->job.ntime);
-   g_work->data[ algo_gate.nbits_index ] = le32dec(sctx->job.nbits);
-   g_work->data[20] = 0x80000000;
-   g_work->data[31] = 0x00000280;
+   algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
+          (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
+          le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits) );
 }

 void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
--- a/miner.h
+++ b/miner.h
@@ -424,7 +424,7 @@ extern size_t rpc2_bloblen;
 extern uint32_t rpc2_target;
 extern char *rpc2_job_id;
 extern char *rpc_user;
-
+extern char *short_url;

 json_t *json_rpc2_call(CURL *curl, const char *url, const char *userpass, const char *rpc_req, int *curl_err, int flags);
 bool rpc2_login(CURL *curl);
@@ -553,6 +553,7 @@ enum algos {
        ALGO_YESCRYPT,
        ALGO_YESCRYPTR8,
        ALGO_YESCRYPTR16,
+        ALGO_YESCRYPTR32,
        ALGO_ZR5,
        ALGO_COUNT
 };
@@ -629,6 +630,7 @@ static const char* const algo_names[] = {
        "yescrypt",
        "yescryptr8",
        "yescryptr16",
+        "yescryptr32",
        "zr5",
        "\0"
 };
@@ -648,6 +650,10 @@ extern int opt_timeout;
 extern bool want_longpoll;
 extern bool have_longpoll;
 extern bool have_gbt;
+extern char*  lp_id;
+extern char *rpc_userpass;
+extern const char *gbt_lp_req;
+extern const char *getwork_req;
 extern bool allow_getwork;
 extern bool want_stratum;
 extern bool have_stratum;
@@ -760,6 +766,7 @@ Options:\n\
                          yescrypt     Globlboost-Y (BSTY)\n\
                          yescryptr8   BitZeny (ZNY)\n\
                          yescryptr16  Yenten (YTN)\n\
+                          yescryptr32  WAVI\n\
                          zr5          Ziftr\n\
  -o, --url=URL         URL of mining server\n\
  -O, --userpass=U:P    username:password pair for mining server\n\
Author	SHA1	Message	Date
Jay D Dee	3363d61524	v3.8.4.1	2018-03-22 14:28:03 -04:00
Jay D Dee	20fe05054c	v3.8.4	2018-03-18 12:51:03 -04:00
Jay D Dee	157508bd07	v3.8.3.3	2018-02-25 14:15:07 -05:00
Jay D Dee	c24a4bdbc2	v3.8.3.2	2018-02-24 14:36:19 -05:00