diff --git a/README.md b/README.md
index efe55e9..ac1c0db 100644
--- a/README.md
+++ b/README.md
@@ -12,14 +12,23 @@ a false positive, they are flagged simply because they are cryptocurrency
 miners. The source code is open for anyone to inspect. If you don't trust 
 the software, don't use it.
 
+
+New thread:
+
+https://bitcointalk.org/index.php?topic=5226770.msg53865575#msg53865575
+
+Old thread:
+
 https://bitcointalk.org/index.php?topic=1326803.0
 
 mailto://jayddee246@gmail.com
 
+This note is to confirm that bitcointalk users JayDDee and joblo are the
+same person.
 
 I created a new BCT user JayDDee to match my github user id.
-Over timme all discussion will be migrated using the new user.
-This note is to confirm JayDDee is the sae person as joblo.
+The old thread has been locked but still contains useful information for
+reading.
 
 See file RELEASE_NOTES for change log and INSTALL_LINUX or INSTALL_WINDOWS
 for compile instructions.
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index c91cf69..3da8ee8 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -65,9 +65,15 @@ If not what makes it happen or not happen?
 Change Log
 ----------
 
+v3.12.4
+
+Issue #244: Change longpoll to ignore job id.
+
+Lyra2rev2 AVX2 +3%, AVX512 +6%.
+
 v3.12.3.1
 
-Issue #241: Fixed regression that broke GBT in v3.11.2.
+Issue #241: Fixed regression that broke coinbase address in v3.11.7.
 
 v3.12.3
 
diff --git a/algo/lyra2/lyra2-gate.c b/algo/lyra2/lyra2-gate.c
index 5ec0551..6e42778 100644
--- a/algo/lyra2/lyra2-gate.c
+++ b/algo/lyra2/lyra2-gate.c
@@ -94,12 +94,12 @@ bool lyra2rev2_thread_init()
    const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
 
    int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
-#if defined (LYRA2REV2_8WAY)
+#if defined (LYRA2REV2_16WAY)
    l2v2_wholeMatrix = _mm_malloc( 2 * size, 64 );   // 2 way
-   init_lyra2rev2_8way_ctx();;
-#elif defined (LYRA2REV2_4WAY)
+   init_lyra2rev2_16way_ctx();;
+#elif defined (LYRA2REV2_8WAY)
    l2v2_wholeMatrix = _mm_malloc( size, 64 );
-   init_lyra2rev2_4way_ctx();;
+   init_lyra2rev2_8way_ctx();;
 #else
    l2v2_wholeMatrix = _mm_malloc( size, 64 );
    init_lyra2rev2_ctx();
@@ -109,12 +109,12 @@ bool lyra2rev2_thread_init()
 
 bool register_lyra2rev2_algo( algo_gate_t* gate )
 {
-#if defined (LYRA2REV2_8WAY)
+#if defined (LYRA2REV2_16WAY)
+  gate->scanhash  = (void*)&scanhash_lyra2rev2_16way;
+  gate->hash      = (void*)&lyra2rev2_16way_hash;
+#elif defined (LYRA2REV2_8WAY)
   gate->scanhash  = (void*)&scanhash_lyra2rev2_8way;
   gate->hash      = (void*)&lyra2rev2_8way_hash;
-#elif defined (LYRA2REV2_4WAY)
-  gate->scanhash  = (void*)&scanhash_lyra2rev2_4way;
-  gate->hash      = (void*)&lyra2rev2_4way_hash;
 #else
   gate->scanhash  = (void*)&scanhash_lyra2rev2;
   gate->hash      = (void*)&lyra2rev2_hash;
diff --git a/algo/lyra2/lyra2-gate.h b/algo/lyra2/lyra2-gate.h
index 2f952e6..138d7bb 100644
--- a/algo/lyra2/lyra2-gate.h
+++ b/algo/lyra2/lyra2-gate.h
@@ -51,30 +51,32 @@ bool init_lyra2rev3_ctx();
 //////////////////////////////////
 
 #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
-  #define LYRA2REV2_8WAY 1
+  #define LYRA2REV2_16WAY 1
 #elif defined(__AVX2__)
-  #define LYRA2REV2_4WAY 1
+  #define LYRA2REV2_8WAY 1
 #endif
 
 extern __thread uint64_t* l2v2_wholeMatrix;
 
 bool register_lyra2rev2_algo( algo_gate_t* gate );
 
-#if defined(LYRA2REV2_8WAY)
+#if defined(LYRA2REV2_16WAY)
+
+void lyra2rev2_16way_hash( void *state, const void *input );
+int scanhash_lyra2rev2_16way( struct work *work, uint32_t max_nonce,
+                             uint64_t *hashes_done, struct thr_info *mythr );
+bool init_lyra2rev2_16way_ctx();
+
+#elif defined(LYRA2REV2_8WAY)
 
 void lyra2rev2_8way_hash( void *state, const void *input );
 int scanhash_lyra2rev2_8way( struct work *work, uint32_t max_nonce,
                              uint64_t *hashes_done, struct thr_info *mythr );
 bool init_lyra2rev2_8way_ctx();
 
-#elif defined(LYRA2REV2_4WAY)
-
-void lyra2rev2_4way_hash( void *state, const void *input );
-int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
-                             uint64_t *hashes_done, struct thr_info *mythr );
-bool init_lyra2rev2_4way_ctx();
 
 #else
+
 void lyra2rev2_hash( void *state, const void *input );
 int scanhash_lyra2rev2( struct work *work, uint32_t max_nonce,
                         uint64_t *hashes_done, struct thr_info *mythr );
diff --git a/algo/lyra2/lyra2rev2-4way.c b/algo/lyra2/lyra2rev2-4way.c
index fb0301f..603c34d 100644
--- a/algo/lyra2/lyra2rev2-4way.c
+++ b/algo/lyra2/lyra2rev2-4way.c
@@ -8,12 +8,30 @@
 #include "algo/cubehash/cube-hash-2way.h"
 
 
-#if 0
-void lyra2rev2_8way_hash( void *state, const void *input )
+#if defined (LYRA2REV2_16WAY)
+
+typedef struct {
+   blake256_16way_context    blake;
+   keccak256_8way_context    keccak;
+   cubehashParam             cube;
+   skein256_8way_context     skein;
+   bmw256_16way_context      bmw;
+} lyra2v2_16way_ctx_holder __attribute__ ((aligned (64)));
+
+static lyra2v2_16way_ctx_holder l2v2_16way_ctx;
+
+bool init_lyra2rev2_16way_ctx()
 {
-   uint32_t vhash[8*8] __attribute__ ((aligned (128)));
-   uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
-   uint32_t vhashB[8*8] __attribute__ ((aligned (64)));
+   keccak256_8way_init( &l2v2_16way_ctx.keccak );
+   cubehashInit( &l2v2_16way_ctx.cube, 256, 16, 32 );
+   skein256_8way_init( &l2v2_16way_ctx.skein );
+   bmw256_16way_init( &l2v2_16way_ctx.bmw );
+   return true;
+}
+
+void lyra2rev2_16way_hash( void *state, const void *input )
+{
+   uint32_t vhash[8*16] __attribute__ ((aligned (128)));
    uint32_t hash0[8] __attribute__ ((aligned (64)));
    uint32_t hash1[8] __attribute__ ((aligned (64)));
    uint32_t hash2[8] __attribute__ ((aligned (64)));
@@ -22,35 +40,60 @@ void lyra2rev2_8way_hash( void *state, const void *input )
    uint32_t hash5[8] __attribute__ ((aligned (64)));
    uint32_t hash6[8] __attribute__ ((aligned (64)));
    uint32_t hash7[8] __attribute__ ((aligned (64)));
-   lyra2v2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
-   memcpy( &ctx, &l2v2_8way_ctx, sizeof(l2v2_8way_ctx) );
+   uint32_t hash8[8] __attribute__ ((aligned (64)));
+   uint32_t hash9[8] __attribute__ ((aligned (64)));
+   uint32_t hash10[8] __attribute__ ((aligned (64)));
+   uint32_t hash11[8] __attribute__ ((aligned (64)));
+   uint32_t hash12[8] __attribute__ ((aligned (64)));
+   uint32_t hash13[8] __attribute__ ((aligned (64)));
+   uint32_t hash14[8] __attribute__ ((aligned (64)));
+   uint32_t hash15[8] __attribute__ ((aligned (64)));
+   lyra2v2_16way_ctx_holder ctx __attribute__ ((aligned (64)));
+   memcpy( &ctx, &l2v2_16way_ctx, sizeof(l2v2_16way_ctx) );
 
-   blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
-   blake256_8way_close( &ctx.blake, vhash );
+   blake256_16way_update( &ctx.blake, input + (64<<4), 16 );
+   blake256_16way_close( &ctx.blake, vhash );
 
-   rintrlv_8x32_8x64( vhashA, vhash, 256 );
+   dintrlv_16x32( hash0,  hash1,  hash2,  hash3,
+                  hash4,  hash5,  hash6,  hash7,
+                  hash8,  hash9,  hash10, hash11,
+                  hash12, hash13, hash14, hash15, vhash, 256 );
 
-   keccak256_8way_update( &ctx.keccak, vhashA, 32 );
+   intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
+                       hash4, hash5, hash6, hash7, 256 );
+
+   keccak256_8way_update( &ctx.keccak, vhash, 32 );
    keccak256_8way_close( &ctx.keccak, vhash );
 
    dintrlv_8x64( hash0, hash1, hash2, hash3,
                  hash4, hash5, hash6, hash7, vhash, 256 );
+   intrlv_8x64( vhash, hash8,  hash9,  hash10, hash11,
+                       hash12, hash13, hash14, hash15, 256 );
 
-   cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
-   cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
-   cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
-   cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
-   cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
-   cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
-   cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
-   cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
+   keccak256_8way_init( &ctx.keccak );
+   keccak256_8way_update( &ctx.keccak, vhash, 32 );
+   keccak256_8way_close( &ctx.keccak, vhash );
+
+   dintrlv_8x64( hash8,  hash9,  hash10,  hash11,
+                 hash12, hash13, hash14, hash5, vhash, 256 );
+
+   cubehash_full( &ctx.cube, (byte*) hash0,  256, (const byte*) hash0,  32 );
+   cubehash_full( &ctx.cube, (byte*) hash1,  256, (const byte*) hash1,  32 );
+   cubehash_full( &ctx.cube, (byte*) hash2,  256, (const byte*) hash2,  32 );
+   cubehash_full( &ctx.cube, (byte*) hash3,  256, (const byte*) hash3,  32 );
+   cubehash_full( &ctx.cube, (byte*) hash4,  256, (const byte*) hash4,  32 );
+   cubehash_full( &ctx.cube, (byte*) hash5,  256, (const byte*) hash5,  32 );
+   cubehash_full( &ctx.cube, (byte*) hash6,  256, (const byte*) hash6,  32 );
+   cubehash_full( &ctx.cube, (byte*) hash7,  256, (const byte*) hash7,  32 );
+   cubehash_full( &ctx.cube, (byte*) hash8,  256, (const byte*) hash8,  32 );
+   cubehash_full( &ctx.cube, (byte*) hash9,  256, (const byte*) hash9,  32 );
+   cubehash_full( &ctx.cube, (byte*) hash10, 256, (const byte*) hash10, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash11, 256, (const byte*) hash11, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash12, 256, (const byte*) hash12, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash13, 256, (const byte*) hash13, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash14, 256, (const byte*) hash14, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash15, 256, (const byte*) hash15, 32 );
 
-//   cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
-//   cube_4way_init( &ctx.cube, 256, 16, 32 );
-//   cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
-//
-//   dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
-//   dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
 
    intrlv_2x256( vhash, hash0, hash1, 256 );
    LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
@@ -64,61 +107,127 @@ void lyra2rev2_8way_hash( void *state, const void *input )
    intrlv_2x256( vhash, hash6, hash7, 256 );
    LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
    dintrlv_2x256( hash6, hash7, vhash, 256 );
+   intrlv_2x256( vhash, hash8, hash9, 256 );
+   LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
+   dintrlv_2x256( hash8, hash9, vhash, 256 );
+   intrlv_2x256( vhash, hash10, hash11, 256 );
+   LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
+   dintrlv_2x256( hash10, hash11, vhash, 256 );
+   intrlv_2x256( vhash, hash12, hash13, 256 );
+   LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
+   dintrlv_2x256( hash12, hash13, vhash, 256 );
+   intrlv_2x256( vhash, hash14, hash15, 256 );
+   LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
+   dintrlv_2x256( hash14, hash15, vhash, 256 );
 
-   intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
-                hash7, 256 );
-
+   intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
+                       hash4, hash5, hash6, hash7, 256 );
    skein256_8way_update( &ctx.skein, vhash, 32 );
    skein256_8way_close( &ctx.skein, vhash );
 
    dintrlv_8x64( hash0, hash1, hash2, hash3,
                  hash4, hash5, hash6, hash7, vhash, 256 );
+   intrlv_8x64( vhash, hash8,  hash9,  hash10, hash11, hash12,
+                       hash13, hash14, hash15, 256 );
 
-   cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
-   cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
-   cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
-   cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
-   cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
-   cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
-   cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
-   cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
+   skein256_8way_init( &ctx.skein );
+   skein256_8way_update( &ctx.skein, vhash, 32 );
+   skein256_8way_close( &ctx.skein, vhash );
 
-//   cube_4way_init( &ctx.cube, 256, 16, 32 );
-//   cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
-//   cube_4way_init( &ctx.cube, 256, 16, 32 );
-//   cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
-//   
-//   dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
-//   dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
+   dintrlv_8x64( hash8,  hash9,  hash10, hash11,
+                 hash12, hash13, hash14, hash15, vhash, 256 );
 
-   intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
-                hash7, 256 );
+   
+   cubehash_full( &ctx.cube, (byte*) hash0,  256, (const byte*) hash0, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash1,  256, (const byte*) hash1, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash2,  256, (const byte*) hash2, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash3,  256, (const byte*) hash3, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash4,  256, (const byte*) hash4, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash5,  256, (const byte*) hash5, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash6,  256, (const byte*) hash6, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash7,  256, (const byte*) hash7, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash8,  256, (const byte*) hash8,  32 );
+   cubehash_full( &ctx.cube, (byte*) hash9,  256, (const byte*) hash9,  32 );
+   cubehash_full( &ctx.cube, (byte*) hash10, 256, (const byte*) hash10, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash11, 256, (const byte*) hash11, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash12, 256, (const byte*) hash12, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash13, 256, (const byte*) hash13, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash14, 256, (const byte*) hash14, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash15, 256, (const byte*) hash15, 32 );
 
-   bmw256_8way_update( &ctx.bmw, vhash, 32 );
-   bmw256_8way_close( &ctx.bmw, state );
+   intrlv_16x32( vhash, hash0,  hash1,  hash2,  hash3,
+                        hash4,  hash5,  hash6,  hash7,
+                        hash8,  hash9,  hash10, hash11,
+                        hash12, hash13, hash14, hash15, 256 );
+
+   bmw256_16way_update( &ctx.bmw, vhash, 32 );
+   bmw256_16way_close( &ctx.bmw, state );
 }
-#endif
 
+int scanhash_lyra2rev2_16way( struct work *work, const uint32_t max_nonce,
+                             uint64_t *hashes_done, struct thr_info *mythr )
+{
+   uint32_t hash[8*16] __attribute__ ((aligned (128)));
+   uint32_t vdata[20*16] __attribute__ ((aligned (64)));
+   uint32_t *hashd7 = &hash[7*16];
+   uint32_t lane_hash[8] __attribute__ ((aligned (32)));
+   uint32_t *pdata = work->data;
+   uint32_t *ptarget = work->target;
+   const uint32_t first_nonce = pdata[19];
+   const uint32_t last_nonce = max_nonce - 16;
+   uint32_t n = first_nonce;
+   const uint32_t targ32 = ptarget[7];
+   __m512i  *noncev = (__m512i*)vdata + 19;
+   const int thr_id = mythr->id;
+   const bool bench = opt_benchmark;
 
+   if ( bench )  ptarget[7] = 0x0000ff;
 
+   mm512_bswap32_intrlv80_16x32( vdata, pdata );
+   *noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
+                               n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+ 1, n );
+   blake256_16way_init( &l2v2_16way_ctx.blake );
+   blake256_16way_update( &l2v2_16way_ctx.blake, vdata, 64 );
 
-#if defined (LYRA2REV2_8WAY)
+   do
+   {
+      lyra2rev2_16way_hash( hash, vdata );
+
+      for ( int lane = 0; lane < 16; lane++ )
+      if ( unlikely( hashd7[lane] <= targ32 ) )
+      {
+         extr_lane_16x32( lane_hash, hash, lane, 256 );
+         if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
+         {
+             pdata[19] = bswap_32( n + lane );
+             submit_lane_solution( work, lane_hash, mythr, lane );
+         }
+      }
+      *noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
+      n += 16;
+   } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
+   pdata[19] = n;
+   *hashes_done = n - first_nonce;
+   return 0;
+}
+
+#elif defined (LYRA2REV2_8WAY)
 
 typedef struct {
    blake256_8way_context     blake;
-   keccak256_8way_context    keccak;
+   keccak256_4way_context    keccak;
    cubehashParam             cube;
-   skein256_8way_context     skein;
-   bmw256_8way_context          bmw;
+   skein256_4way_context     skein;
+   bmw256_8way_context       bmw;
 } lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
 
 static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
 
 bool init_lyra2rev2_8way_ctx()
 {
-   keccak256_8way_init( &l2v2_8way_ctx.keccak );
+   keccak256_4way_init( &l2v2_8way_ctx.keccak );
    cubehashInit( &l2v2_8way_ctx.cube, 256, 16, 32 );
-   skein256_8way_init( &l2v2_8way_ctx.skein );
+   skein256_4way_init( &l2v2_8way_ctx.skein );
    bmw256_8way_init( &l2v2_8way_ctx.bmw );
    return true;
 }
@@ -126,7 +235,6 @@ bool init_lyra2rev2_8way_ctx()
 void lyra2rev2_8way_hash( void *state, const void *input )
 {
    uint32_t vhash[8*8] __attribute__ ((aligned (128)));
-   uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
    uint32_t hash0[8] __attribute__ ((aligned (64)));
    uint32_t hash1[8] __attribute__ ((aligned (64)));
    uint32_t hash2[8] __attribute__ ((aligned (64)));
@@ -141,14 +249,19 @@ void lyra2rev2_8way_hash( void *state, const void *input )
    blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
    blake256_8way_close( &ctx.blake, vhash );
 
-   rintrlv_8x32_8x64( vhashA, vhash, 256 );
-
-   keccak256_8way_update( &ctx.keccak, vhashA, 32 );
-   keccak256_8way_close( &ctx.keccak, vhash );
-
-   dintrlv_8x64( hash0, hash1, hash2, hash3,
+   dintrlv_8x32( hash0, hash1, hash2, hash3,
                  hash4, hash5, hash6, hash7, vhash, 256 );
 
+   intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
+   keccak256_4way_update( &ctx.keccak, vhash, 32 );
+   keccak256_4way_close( &ctx.keccak, vhash );
+   dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
+   intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
+   keccak256_4way_init( &ctx.keccak );
+   keccak256_4way_update( &ctx.keccak, vhash, 32 );
+   keccak256_4way_close( &ctx.keccak, vhash );
+   dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
+
    cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
    cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
    cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
@@ -158,27 +271,25 @@ void lyra2rev2_8way_hash( void *state, const void *input )
    cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
    cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
 
-   intrlv_2x256( vhash, hash0, hash1, 256 );
-   LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
-   dintrlv_2x256( hash0, hash1, vhash, 256 );
-   intrlv_2x256( vhash, hash2, hash3, 256 );
-   LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
-   dintrlv_2x256( hash2, hash3, vhash, 256 );
-   intrlv_2x256( vhash, hash4, hash5, 256 );
-   LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
-   dintrlv_2x256( hash4, hash5, vhash, 256 );
-   intrlv_2x256( vhash, hash6, hash7, 256 );
-   LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
-   dintrlv_2x256( hash6, hash7, vhash, 256 );
 
-   intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
-                hash7, 256 );
-
-   skein256_8way_update( &ctx.skein, vhash, 32 );
-   skein256_8way_close( &ctx.skein, vhash );
-
-   dintrlv_8x64( hash0, hash1, hash2, hash3,
-                 hash4, hash5, hash6, hash7, vhash, 256 );
+   LYRA2REV2( l2v2_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
+   LYRA2REV2( l2v2_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 );
+   LYRA2REV2( l2v2_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
+   LYRA2REV2( l2v2_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
+   LYRA2REV2( l2v2_wholeMatrix, hash4, 32, hash4, 32, hash4, 32, 1, 4, 4 );
+   LYRA2REV2( l2v2_wholeMatrix, hash5, 32, hash5, 32, hash5, 32, 1, 4, 4 );
+   LYRA2REV2( l2v2_wholeMatrix, hash6, 32, hash6, 32, hash6, 32, 1, 4, 4 );
+   LYRA2REV2( l2v2_wholeMatrix, hash7, 32, hash7, 32, hash7, 32, 1, 4, 4 );
+   
+   intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
+   skein256_4way_update( &ctx.skein, vhash, 32 );
+   skein256_4way_close( &ctx.skein, vhash );
+   dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
+   intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
+   skein256_4way_init( &ctx.skein );
+   skein256_4way_update( &ctx.skein, vhash, 32 );
+   skein256_4way_close( &ctx.skein, vhash );
+   dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
 
    cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
    cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
@@ -189,8 +300,8 @@ void lyra2rev2_8way_hash( void *state, const void *input )
    cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
    cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
 
-   intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, 
-                hash7, 256 );
+   intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
+                       hash4, hash5, hash6, hash7, 256 );
 
    bmw256_8way_update( &ctx.bmw, vhash, 32 );
    bmw256_8way_close( &ctx.bmw, state );
@@ -223,7 +334,6 @@ int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
    do
    {
       lyra2rev2_8way_hash( hash, vdata );
-      pdata[19] = n;
 
       for ( int lane = 0; lane < 8; lane++ )
       if ( unlikely( hashd7[lane] <= targ32 ) )
@@ -243,6 +353,9 @@ int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
    return 0;
 }
 
+#endif
+
+/*
 #elif defined (LYRA2REV2_4WAY)
 
 typedef struct {
@@ -367,3 +480,4 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
 }
 
 #endif
+*/
diff --git a/configure b/configure
index 782e291..477f673 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.3.1.
+# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.4.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='cpuminer-opt'
 PACKAGE_TARNAME='cpuminer-opt'
-PACKAGE_VERSION='3.12.3.1'
-PACKAGE_STRING='cpuminer-opt 3.12.3.1'
+PACKAGE_VERSION='3.12.4'
+PACKAGE_STRING='cpuminer-opt 3.12.4'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures cpuminer-opt 3.12.3.1 to adapt to many kinds of systems.
+\`configure' configures cpuminer-opt 3.12.4 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1404,7 +1404,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of cpuminer-opt 3.12.3.1:";;
+     short | recursive ) echo "Configuration of cpuminer-opt 3.12.4:";;
    esac
   cat <<\_ACEOF
 
@@ -1509,7 +1509,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-cpuminer-opt configure 3.12.3.1
+cpuminer-opt configure 3.12.4
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by cpuminer-opt $as_me 3.12.3.1, which was
+It was created by cpuminer-opt $as_me 3.12.4, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2993,7 +2993,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='cpuminer-opt'
- VERSION='3.12.3.1'
+ VERSION='3.12.4'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by cpuminer-opt $as_me 3.12.3.1, which was
+This file was extended by cpuminer-opt $as_me 3.12.4, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-cpuminer-opt config.status 3.12.3.1
+cpuminer-opt config.status 3.12.4
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/configure.ac b/configure.ac
index 838a7e7..e3b7f58 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([cpuminer-opt], [3.12.3.1])
+AC_INIT([cpuminer-opt], [3.12.4])
 
 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
diff --git a/cpu-miner.c b/cpu-miner.c
index 7541cd1..ef80492 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -1152,10 +1152,25 @@ static int share_result( int result, struct work *null_work,
            my_stats.share_count, acol, ares, scol, sres, rcol, rres, bcol,
            bres, share_time, latency );
 
+// purge job id when solo, diff is good   
+   if ( !opt_quiet )
+   {
+      if ( have_stratum )
+         applog2( LOG_NOTICE, "Diff %.5g (%.3g%), %sBlock %d, %sJob %s" CL_WHT,
+               my_stats.share_diff, share_ratio, bcol, stratum.block_height,
+               scol, my_stats.job_id );
+      else
+         applog2( LOG_NOTICE, "Diff %.5g (%.3g%), %sBlock %d" CL_WHT,
+               my_stats.share_diff, share_ratio, bcol, stratum.block_height,
+               scol );
+    }
+
+/*
    if ( have_stratum && !opt_quiet )
       applog2( LOG_NOTICE, "Diff %.5g (%.3g%), %sBlock %d, %sJob %s" CL_WHT,
                my_stats.share_diff, share_ratio, bcol, stratum.block_height,
                scol, my_stats.job_id );
+*/
 
    if ( unlikely( reason && !result ) )
    {
@@ -1698,8 +1713,12 @@ void work_set_target_ratio( struct work* work, const void *hash )
    share_stats[ s_put_ptr ].net_diff = net_diff;
    share_stats[ s_put_ptr ].stratum_diff = stratum_diff;
    share_stats[ s_put_ptr ].target_diff = work->targetdiff;
-   ( (uint64_t*)share_stats[ s_put_ptr ].job_id )[3] = 0;
-   strncpy( share_stats[ s_put_ptr ].job_id, work->job_id, 30 );
+
+
+//purge job id when solo
+   if ( have_stratum )
+      strncpy( share_stats[ s_put_ptr ].job_id, work->job_id, 30 );
+   
    s_put_ptr = stats_ptr_incr( s_put_ptr );
 
    pthread_mutex_unlock( &stats_lock );
@@ -1712,9 +1731,16 @@ bool submit_solution( struct work *work, const void *hash,
   {
      submitted_share_count++;
      work_set_target_ratio( work, hash );
+//purge job id when solo
      if ( !opt_quiet )
-        applog( LOG_NOTICE, "%d submitted by thread %d, job %s",
-            submitted_share_count, thr->id, work->job_id );
+     {
+        if ( have_stratum )
+           applog( LOG_NOTICE, "%d submitted by thread %d, job %s",
+               submitted_share_count, thr->id, work->job_id );
+        else
+           applog( LOG_NOTICE, "%d submitted by thread %d",
+               submitted_share_count, thr->id );
+     }
 
 if ( lowdiff_debug )
 {
@@ -1740,9 +1766,16 @@ bool submit_lane_solution( struct work *work, const void *hash,
   {
      submitted_share_count++;
      work_set_target_ratio( work, hash );
+//purge job id when solo
      if ( !opt_quiet )
-        applog( LOG_NOTICE, "%d submitted by thread %d, lane %d, job %s",
-            submitted_share_count, thr->id, lane, work->job_id );
+     {
+        if ( have_stratum )
+           applog( LOG_NOTICE, "%d submitted by thread %d, lane %d, job %s",
+               submitted_share_count, thr->id, lane, work->job_id );
+        else
+           applog( LOG_NOTICE, "%d submitted by thread %d, lane %d",
+               submitted_share_count, thr->id, lane );
+     }
 
 if ( lowdiff_debug )
 {
@@ -1854,9 +1887,14 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
 {
    uint32_t *nonceptr = work->data + algo_gate.nonce_index;
 
-   bool force_new_work = work->job_id ? strtoul(   work->job_id, NULL, 16 ) !=
-                                        strtoul( g_work->job_id, NULL, 16 )
-                                      : true;
+//purge job id when solo
+   bool force_new_work; 
+   if ( have_stratum ) 
+      force_new_work = work->job_id ?    strtoul(   work->job_id, NULL, 16 )
+                                      != strtoul( g_work->job_id, NULL, 16 )
+                                     : true;
+   else
+      force_new_work = false;
 
    if ( force_new_work || *nonceptr >= *end_nonce_ptr )
    {
@@ -2292,13 +2330,18 @@ start:
       soval = json_object_get(res, "submitold");
       submit_old = soval ? json_is_true(soval) : false;
 	   pthread_mutex_lock(&g_work_lock);
-	   start_job_id = g_work.job_id ? strdup(g_work.job_id) : NULL;
+
+// This code has been here for a long time even though job_id isn't used.
+// This needs to be changed eventually to test the block height properly
+// using g_work.block_height .     
+      start_job_id = g_work.job_id ? strdup(g_work.job_id) : NULL;
 	   if (have_gbt)
 	      rc = gbt_work_decode(res, &g_work);
 	   else
 	      rc = work_decode(res, &g_work);
 	   if (rc)
       {
+// purge job id from solo mining
         bool newblock = g_work.job_id && strcmp(start_job_id, g_work.job_id);
 	     newblock |= (start_diff != net_diff); // the best is the height but... longpoll...
         if (newblock)