diff --git a/Makefile.am b/Makefile.am
index 736297a..7ae1d4a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -163,6 +163,7 @@ cpuminer_SOURCES = \
   algo/sha/sha256-hash-4way.c \
   algo/sha/sha512-hash-4way.c \
   algo/sha/hmac-sha256-hash.c \
+  algo/sha/hmac-sha256-hash-4way.c \
   algo/sha/sha2.c \
   algo/sha/sha256t-gate.c \
   algo/sha/sha256t-4way.c \
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 6d0e1e1..505d88e 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -65,6 +65,13 @@ If not what makes it happen or not happen?
 Change Log
 ----------
 
+v3.12.7
+
+Issue #257: fixed a file descriptor leak which caused the CPU temperature
+and frequency query to report zeros after mining for a couple of hours.
+
+Issue #253: stale share reduction for yescrypt, sonoa.
+
 v3.12.6.1
 
 Issue #252: Fixed SSL mining (stratum+tcps://)
@@ -106,7 +113,7 @@ a specific algo name.
 v3.12.4.6
 
 Issue #246: fixed getwork repeated new block logs with same height. New work
-for the same block is now reported as "New work" instead of New block".
+for the same block is now reported as "New work" instead of "New block".
 Also added a check that work is new before generating "New work" log.
 
 Added target diff to getwork new block log.
diff --git a/algo-gate-api.c b/algo-gate-api.c
index b934529..5f75f1e 100644
--- a/algo-gate-api.c
+++ b/algo-gate-api.c
@@ -232,11 +232,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
     case ALGO_X22I:          register_x22i_algo          ( gate ); break;
     case ALGO_X25X:          register_x25x_algo          ( gate ); break;
     case ALGO_XEVAN:         register_xevan_algo         ( gate ); break;
-/*    case ALGO_YESCRYPT:     register_yescrypt_05_algo     ( gate ); break;
-     case ALGO_YESCRYPTR8:   register_yescryptr8_05_algo   ( gate ); break;
-     case ALGO_YESCRYPTR16:  register_yescryptr16_05_algo  ( gate ); break;
-     case ALGO_YESCRYPTR32:  register_yescryptr32_05_algo  ( gate ); break;
-*/
     case ALGO_YESCRYPT:      register_yescrypt_algo      ( gate ); break;
     case ALGO_YESCRYPTR8:    register_yescryptr8_algo    ( gate ); break;
     case ALGO_YESCRYPTR8G:   register_yescryptr8g_algo   ( gate ); break;
diff --git a/algo/sha/hmac-sha256-hash-4way.c b/algo/sha/hmac-sha256-hash-4way.c
new file mode 100644
index 0000000..43fa272
--- /dev/null
+++ b/algo/sha/hmac-sha256-hash-4way.c
@@ -0,0 +1,440 @@
+/*-
+ * Copyright 2005,2007,2009 Colin Percival
+ * Copywright 2020 JayDDee246@gmail.com
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <string.h>
+#include "hmac-sha256-hash-4way.h"
+#include "compat.h"
+
+// HMAC 4-way SSE2
+
+/**
+ * HMAC_SHA256_Buf(K, Klen, in, len, digest):
+ * Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
+ * length ${Klen}, and write the result to ${digest}.
+ */
+void
+hmac_sha256_4way_full( void *digest, const void *K, size_t Klen,
+                       const void *in, size_t len )
+{
+   hmac_sha256_4way_context ctx;
+   hmac_sha256_4way_init( &ctx, K, Klen );
+   hmac_sha256_4way_update( &ctx, in, len );
+   hmac_sha256_4way_close( &ctx, digest );
+}
+
+/* Initialize an HMAC-SHA256 operation with the given key. */
+void
+hmac_sha256_4way_init( hmac_sha256_4way_context *ctx, const void *_K,
+                       size_t Klen )
+{
+	unsigned char pad[64*4] __attribute__ ((aligned (64)));
+	unsigned char khash[32*4] __attribute__ ((aligned (64)));
+	const unsigned char * K = _K;
+	size_t i;
+
+	/* If Klen > 64, the key is really SHA256(K). */
+	if ( Klen > 64 )
+   {
+		sha256_4way_init( &ctx->ictx );
+		sha256_4way_update( &ctx->ictx, K, Klen );
+		sha256_4way_close( &ctx->ictx, khash );
+		K = khash;
+		Klen = 32;
+	}
+
+	/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
+   sha256_4way_init( &ctx->ictx );
+	memset( pad, 0x36, 64*4 );
+
+   for ( i = 0; i < Klen; i++ )
+		casti_m128i( pad, i ) = _mm_xor_si128( casti_m128i( pad, i ),
+                                             casti_m128i( K, i ) );
+
+   sha256_4way_update( &ctx->ictx, pad, 64 );
+
+	/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
+	sha256_4way_init( &ctx->octx );
+	memset( pad, 0x5c, 64*4 );
+	for ( i = 0; i < Klen/4; i++ )
+		casti_m128i( pad, i ) = _mm_xor_si128( casti_m128i( pad, i ),
+                                             casti_m128i( K, i ) );
+	sha256_4way_update( &ctx->octx, pad, 64 );
+}
+
+/* Add bytes to the HMAC-SHA256 operation. */
+void
+hmac_sha256_4way_update( hmac_sha256_4way_context *ctx, const void *in,
+                         size_t len )
+{
+	/* Feed data to the inner SHA256 operation. */
+	sha256_4way_update( &ctx->ictx, in, len );
+}
+
+/* Finish an HMAC-SHA256 operation. */
+void
+hmac_sha256_4way_close( hmac_sha256_4way_context *ctx, void *digest )
+{
+	unsigned char ihash[32*4] __attribute__ ((aligned (64)));
+
+	/* Finish the inner SHA256 operation. */
+	sha256_4way_close( &ctx->ictx, ihash );
+
+	/* Feed the inner hash to the outer SHA256 operation. */
+	sha256_4way_update( &ctx->octx, ihash, 32 );
+
+	/* Finish the outer SHA256 operation. */
+	sha256_4way_close( &ctx->octx, digest );
+}
+
+/**
+ * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
+ * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
+ * write the output to buf.  The value dkLen must be at most 32 * (2^32 - 1).
+ */
+void
+pbkdf2_sha256_4way( uint8_t *buf, size_t dkLen,
+                    const uint8_t *passwd, size_t passwdlen,
+                    const uint8_t *salt, size_t saltlen, uint64_t c )
+{
+	hmac_sha256_4way_context PShctx, hctx;
+	uint8_t _ALIGN(128) T[32*4];
+	uint8_t _ALIGN(128) U[32*4];
+   __m128i ivec;
+   size_t i, clen;
+	uint64_t j;
+	int k;
+
+	/* Compute HMAC state after processing P and S. */
+	hmac_sha256_4way_init( &PShctx, passwd, passwdlen );
+	hmac_sha256_4way_update( &PShctx, salt, saltlen );
+
+	/* Iterate through the blocks. */
+	for ( i = 0; i * 32 < dkLen; i++ )
+   {
+		/* Generate INT(i + 1). */
+      ivec = _mm_set1_epi32( bswap_32( i+1 ) ); 
+
+		/* Compute U_1 = PRF(P, S || INT(i)). */
+		memcpy( &hctx, &PShctx, sizeof(hmac_sha256_4way_context) );
+		hmac_sha256_4way_update( &hctx, &ivec, 4 );
+		hmac_sha256_4way_close( &hctx, U );
+
+		/* T_i = U_1 ... */
+		memcpy( T, U, 32*4 );
+
+		for ( j = 2; j <= c; j++ )
+      {
+			/* Compute U_j. */
+			hmac_sha256_4way_init( &hctx, passwd, passwdlen );
+			hmac_sha256_4way_update( &hctx, U, 32 );
+			hmac_sha256_4way_close( &hctx, U );
+
+			/* ... xor U_j ... */
+			for ( k = 0; k < 8; k++ )
+				casti_m128i( T, k ) = _mm_xor_si128( casti_m128i( T, k ),
+                                                 casti_m128i( U, k ) );
+		}
+
+		/* Copy as many bytes as necessary into buf. */
+		clen = dkLen - i * 32;
+		if ( clen > 32 )
+			clen = 32;
+		memcpy( &buf[ i*32*4 ], T, clen*4 );
+	}
+}
+
+#if defined(__AVX2__)
+
+// HMAC 8-way AVX2
+
+void
+hmac_sha256_8way_full( void *digest, const void *K, size_t Klen,
+                       const void *in, size_t len )
+{
+   hmac_sha256_8way_context ctx;
+   hmac_sha256_8way_init( &ctx, K, Klen );
+   hmac_sha256_8way_update( &ctx, in, len );
+   hmac_sha256_8way_close( &ctx, digest );
+}
+
+/* Initialize an HMAC-SHA256 operation with the given key. */
+void
+hmac_sha256_8way_init( hmac_sha256_8way_context *ctx, const void *_K,
+                       size_t Klen )
+{
+   unsigned char pad[64*8] __attribute__ ((aligned (128)));
+   unsigned char khash[32*8] __attribute__ ((aligned (128)));
+   const unsigned char * K = _K;
+   size_t i;
+
+   /* If Klen > 64, the key is really SHA256(K). */
+   if ( Klen > 64 )
+   {
+      sha256_8way_init( &ctx->ictx );
+      sha256_8way_update( &ctx->ictx, K, Klen );
+      sha256_8way_close( &ctx->ictx, khash );
+      K = khash;
+      Klen = 32;
+   }
+
+   /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
+   sha256_8way_init( &ctx->ictx );
+   memset( pad, 0x36, 64*8);
+
+   for ( i = 0; i < Klen/4; i++ )
+      casti_m256i( pad, i ) = _mm256_xor_si256( casti_m256i( pad, i ),
+                                                casti_m256i( K, i ) );
+
+   sha256_8way_update( &ctx->ictx, pad, 64 );
+
+   /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
+   sha256_8way_init( &ctx->octx );
+   memset( pad, 0x5c, 64*8 );
+   for ( i = 0; i < Klen/4; i++ )
+      casti_m256i( pad, i ) = _mm256_xor_si256( casti_m256i( pad, i ),
+                                                casti_m256i( K, i ) );
+   sha256_8way_update( &ctx->octx, pad, 64 );
+}
+
+void
+hmac_sha256_8way_update( hmac_sha256_8way_context *ctx, const void *in,
+                         size_t len )
+{
+   /* Feed data to the inner SHA256 operation. */
+   sha256_8way_update( &ctx->ictx, in, len );
+}
+
+/* Finish an HMAC-SHA256 operation. */
+void
+hmac_sha256_8way_close( hmac_sha256_8way_context *ctx, void *digest )
+{
+   unsigned char ihash[32*8] __attribute__ ((aligned (128)));
+
+   /* Finish the inner SHA256 operation. */
+   sha256_8way_close( &ctx->ictx, ihash );
+
+   /* Feed the inner hash to the outer SHA256 operation. */
+   sha256_8way_update( &ctx->octx, ihash, 32 );
+
+   /* Finish the outer SHA256 operation. */
+   sha256_8way_close( &ctx->octx, digest );
+}
+
+/**
+ * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
+ * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
+ * write the output to buf.  The value dkLen must be at most 32 * (2^32 - 1).
+ */
+void
+pbkdf2_sha256_8way( uint8_t *buf, size_t dkLen, const uint8_t *passwd,
+                    size_t passwdlen, const uint8_t *salt, size_t saltlen,
+                    uint64_t c )
+{
+   hmac_sha256_8way_context PShctx, hctx;
+   uint8_t _ALIGN(128) T[32*8];
+   uint8_t _ALIGN(128) U[32*8];
+   size_t i, clen;
+   uint64_t j;
+   int k;
+
+   /* Compute HMAC state after processing P and S. */
+   hmac_sha256_8way_init( &PShctx, passwd, passwdlen );
+
+// saltlen can be odd number of bytes
+   hmac_sha256_8way_update( &PShctx, salt, saltlen );
+
+   /* Iterate through the blocks. */
+   for ( i = 0; i * 32 < dkLen; i++ )
+   {
+      __m256i ivec = _mm256_set1_epi32( bswap_32( i+1 ) );
+
+      /* Compute U_1 = PRF(P, S || INT(i)). */
+      memcpy( &hctx, &PShctx, sizeof(hmac_sha256_8way_context) );
+      hmac_sha256_8way_update( &hctx, &ivec, 4 );
+      hmac_sha256_8way_close( &hctx, U );
+
+      /* T_i = U_1 ... */
+      memcpy( T, U, 32*8 );
+
+      for ( j = 2; j <= c; j++ )
+      {
+         /* Compute U_j. */
+         hmac_sha256_8way_init( &hctx, passwd, passwdlen );
+         hmac_sha256_8way_update( &hctx, U, 32 );
+         hmac_sha256_8way_close( &hctx, U );
+
+         /* ... xor U_j ... */
+         for ( k = 0; k < 8; k++ )
+            casti_m256i( T, k ) = _mm256_xor_si256( casti_m256i( T, k ),
+                                                    casti_m256i( U, k ) );
+      }
+
+      /* Copy as many bytes as necessary into buf. */
+      clen = dkLen - i * 32;
+      if ( clen > 32 )
+         clen = 32;
+      memcpy( &buf[ i*32*8 ], T, clen*8 );
+   }
+}
+
+#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
+
+// HMAC 16-way AVX512
+
+void
+hmac_sha256_16way_full( void *digest, const void *K, size_t Klen,
+                        const void *in, size_t len )
+{
+   hmac_sha256_16way_context ctx;
+   hmac_sha256_16way_init( &ctx, K, Klen );
+   hmac_sha256_16way_update( &ctx, in, len );
+   hmac_sha256_16way_close( &ctx, digest );
+}
+
+void
+hmac_sha256_16way_init( hmac_sha256_16way_context *ctx, const void *_K,
+                       size_t Klen )
+{
+   unsigned char pad[64*16] __attribute__ ((aligned (128)));
+   unsigned char khash[32*16] __attribute__ ((aligned (128)));
+   const unsigned char * K = _K;
+   size_t i;
+
+   /* If Klen > 64, the key is really SHA256(K). */
+   if ( Klen > 64 )
+   {
+      sha256_16way_init( &ctx->ictx );
+      sha256_16way_update( &ctx->ictx, K, Klen );
+      sha256_16way_close( &ctx->ictx, khash );
+      K = khash;
+      Klen = 32;
+   }
+
+   /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
+   sha256_16way_init( &ctx->ictx );
+   memset( pad, 0x36, 64*16 );
+
+   for ( i = 0; i < Klen; i++ )
+      casti_m512i( pad, i ) = _mm512_xor_si512( casti_m512i( pad, i ),
+                                                casti_m512i( K, i ) );
+   sha256_16way_update( &ctx->ictx, pad, 64 );
+
+   /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
+   sha256_16way_init( &ctx->octx );
+   memset( pad, 0x5c, 64*16 );
+   for ( i = 0; i < Klen/4; i++ )
+      casti_m512i( pad, i ) = _mm512_xor_si512( casti_m512i( pad, i ),
+                                             casti_m512i( K, i ) );
+   sha256_16way_update( &ctx->octx, pad, 64 );
+}
+   
+void
+hmac_sha256_16way_update( hmac_sha256_16way_context *ctx, const void *in,
+                         size_t len )
+{
+   /* Feed data to the inner SHA256 operation. */
+   sha256_16way_update( &ctx->ictx, in, len );
+}
+
+/* Finish an HMAC-SHA256 operation. */
+void
+hmac_sha256_16way_close( hmac_sha256_16way_context *ctx, void *digest )
+{
+   unsigned char ihash[32*16] __attribute__ ((aligned (128)));
+
+   /* Finish the inner SHA256 operation. */
+   sha256_16way_close( &ctx->ictx, ihash );
+
+   /* Feed the inner hash to the outer SHA256 operation. */
+   sha256_16way_update( &ctx->octx, ihash, 32 );
+
+   /* Finish the outer SHA256 operation. */
+   sha256_16way_close( &ctx->octx, digest );
+}
+
+/**
+ * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
+ * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
+ * write the output to buf.  The value dkLen must be at most 32 * (2^32 - 1).
+ */
+void
+pbkdf2_sha256_16way( uint8_t *buf, size_t dkLen,
+                     const uint8_t *passwd, size_t passwdlen,
+                     const uint8_t *salt, size_t saltlen, uint64_t c )
+{
+   hmac_sha256_16way_context PShctx, hctx;
+   uint8_t _ALIGN(128) T[32*16];
+   uint8_t _ALIGN(128) U[32*16];
+   __m512i ivec;
+   size_t i, clen;
+   uint64_t j;
+   int k;
+
+   /* Compute HMAC state after processing P and S. */
+   hmac_sha256_16way_init( &PShctx, passwd, passwdlen );
+   hmac_sha256_16way_update( &PShctx, salt, saltlen );
+
+   /* Iterate through the blocks. */
+   for ( i = 0; i * 32 < dkLen; i++ )
+   {
+      /* Generate INT(i + 1). */
+      ivec = _mm512_set1_epi32( bswap_32( i+1 ) );
+
+      /* Compute U_1 = PRF(P, S || INT(i)). */
+      memcpy( &hctx, &PShctx, sizeof(hmac_sha256_16way_context) );
+      hmac_sha256_16way_update( &hctx, &ivec, 4 );
+      hmac_sha256_16way_close( &hctx, U );
+
+      /* T_i = U_1 ... */
+      memcpy( T, U, 32*16 );
+
+      for ( j = 2; j <= c; j++ )
+      {
+         /* Compute U_j. */
+         hmac_sha256_16way_init( &hctx, passwd, passwdlen );
+         hmac_sha256_16way_update( &hctx, U, 32 );
+         hmac_sha256_16way_close( &hctx, U );
+
+         /* ... xor U_j ... */
+         for ( k = 0; k < 8; k++ )
+            casti_m512i( T, k ) = _mm512_xor_si512( casti_m512i( T, k ),
+                                                    casti_m512i( U, k ) );
+      }
+
+      /* Copy as many bytes as necessary into buf. */
+      clen = dkLen - i * 32;
+      if ( clen > 32 )
+         clen = 32;
+      memcpy( &buf[ i*32*16 ], T, clen*16 );
+   }
+}
+
+#endif  // AVX512
+#endif  // AVX2
+
diff --git a/algo/sha/hmac-sha256-hash-4way.h b/algo/sha/hmac-sha256-hash-4way.h
new file mode 100644
index 0000000..f33fa23
--- /dev/null
+++ b/algo/sha/hmac-sha256-hash-4way.h
@@ -0,0 +1,107 @@
+/*-
+ * Copyright 2005,2007,2009 Colin Percival
+ * Copyright 2020 JayDDee@gmailcom
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/lib/libmd/sha256_Y.h,v 1.2 2006/01/17 15:35:56 phk Exp $
+ */
+
+#ifndef HMAC_SHA256_4WAY_H__
+#define HMAC_SHA256_4WAY_H__
+
+
+// Tested only 8-way with null pers
+
+#include <sys/types.h>
+#include <stdint.h>
+#include "simd-utils.h"
+#include "sha-hash-4way.h"
+
+typedef struct _hmac_sha256_4way_context
+{
+   sha256_4way_context ictx;
+   sha256_4way_context octx;
+} hmac_sha256_4way_context;
+
+//void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
+void hmac_sha256_4way_init( hmac_sha256_4way_context *, const void *, size_t );
+void hmac_sha256_4way_update( hmac_sha256_4way_context *, const void *,
+                              size_t );
+void hmac_sha256_4way_close( hmac_sha256_4way_context *, void* );
+void hmac_sha256_4way_full( void*, const void *, size_t Klen, const void *,
+                            size_t len );
+
+/**
+ * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
+ * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
+ * write the output to buf.  The value dkLen must be at most 32 * (2^32 - 1).
+ */
+void pbkdf2_sha256_4way( uint8_t *, size_t, const uint8_t *, size_t,
+                         const uint8_t *, size_t, uint64_t );
+
+#if defined(__AVX2__)
+
+typedef struct _hmac_sha256_8way_context
+{
+   sha256_8way_context ictx;
+   sha256_8way_context octx;
+} hmac_sha256_8way_context;
+
+//void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
+void hmac_sha256_8way_init( hmac_sha256_8way_context *, const void *, size_t );
+void hmac_sha256_8way_update( hmac_sha256_8way_context *, const void *,
+                              size_t );
+void hmac_sha256_8way_close( hmac_sha256_8way_context *, void* );
+void hmac_sha256_8way_full( void*, const void *, size_t Klen, const void *,
+                            size_t len );
+
+void pbkdf2_sha256_8way( uint8_t *, size_t, const uint8_t *, size_t,
+                        const uint8_t *, size_t, uint64_t );
+      
+#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
+
+typedef struct _hmac_sha256_16way_context
+{
+   sha256_16way_context ictx;
+   sha256_16way_context octx;
+} hmac_sha256_16way_context;
+
+//void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
+void hmac_sha256_16way_init( hmac_sha256_16way_context *,
+                             const void *, size_t );
+void hmac_sha256_16way_update( hmac_sha256_16way_context *, const void *,
+                              size_t );
+void hmac_sha256_16way_close( hmac_sha256_16way_context *, void* );
+void hmac_sha256_16way_full( void*, const void *, size_t Klen, const void *,
+                             size_t len );
+
+void pbkdf2_sha256_16way( uint8_t *, size_t, const uint8_t *, size_t,
+                          const uint8_t *, size_t, uint64_t );
+
+
+
+#endif   // AVX512
+#endif   // AVX2
+
+#endif // HMAC_SHA256_4WAY_H__
diff --git a/algo/sha/hmac-sha256-hash.c b/algo/sha/hmac-sha256-hash.c
index 291b122..99b68d8 100644
--- a/algo/sha/hmac-sha256-hash.c
+++ b/algo/sha/hmac-sha256-hash.c
@@ -81,16 +81,17 @@ HMAC_SHA256_Init( HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen )
 
 	/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
    SHA256_Init( &ctx->ictx );
-	memset( pad, 0x36, 64 );
-	for ( i = 0; i < Klen; i++ )
-		pad[i] ^= K[i];
+
+
+   for ( i = 0; i < Klen; i++ )  pad[i] = K[i] ^ 0x36;
+   memset( pad + Klen, 0x36, 64 - Klen );
 	SHA256_Update( &ctx->ictx, pad, 64 );
 
 	/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
 	SHA256_Init( &ctx->octx );
-	memset(pad, 0x5c, 64);
-	for ( i = 0; i < Klen; i++ )
-		pad[i] ^= K[i];
+
+   for ( i = 0; i < Klen; i++ )  pad[i] = K[i] ^ 0x5c;
+   memset( pad + Klen, 0x5c, 64 - Klen );
 	SHA256_Update( &ctx->octx, pad, 64 );
 }
 
@@ -161,7 +162,13 @@ PBKDF2_SHA256( const uint8_t *passwd, size_t passwdlen, const uint8_t *salt,
 			HMAC_SHA256_Final( U, &hctx );
 
 			/* ... xor U_j ... */
-			for ( k = 0; k < 32; k++ )
+//         _mm256_xor_si256( *(__m256i*)T, *(__m256i*)U );
+//         _mm_xor_si128( ((__m128i*)T)[0], ((__m128i*)U)[0] );
+//         _mm_xor_si128( ((__m128i*)T)[1], ((__m128i*)U)[1] );
+
+//         for ( k = 0; k < 4; k++ )  T[k] ^= U[k];
+         
+         for ( k = 0; k < 32; k++ )
 				T[k] ^= U[k];
 		}
 
diff --git a/algo/sha/sha-hash-4way.h b/algo/sha/sha-hash-4way.h
index 3635dd9..3a0c61b 100644
--- a/algo/sha/sha-hash-4way.h
+++ b/algo/sha/sha-hash-4way.h
@@ -58,6 +58,7 @@ void sha256_4way_init( sha256_4way_context *sc );
 void sha256_4way_update( sha256_4way_context *sc, const void *data,
                          size_t len );
 void sha256_4way_close( sha256_4way_context *sc, void *dst );
+void sha256_4way_full( void *dst, const void *data, size_t len );
 
 #endif  // SSE2
 
@@ -75,6 +76,7 @@ typedef struct {
 void sha256_8way_init( sha256_8way_context *sc );
 void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len );
 void sha256_8way_close( sha256_8way_context *sc, void *dst );
+void sha256_8way_full( void *dst, const void *data, size_t len );
 
 #endif  // AVX2
 
@@ -92,6 +94,7 @@ typedef struct {
 void sha256_16way_init( sha256_16way_context *sc );
 void sha256_16way_update( sha256_16way_context *sc, const void *data, size_t len );
 void sha256_16way_close( sha256_16way_context *sc, void *dst );
+void sha256_16way_full( void *dst, const void *data, size_t len );
 
 #endif // AVX512
 
@@ -110,6 +113,7 @@ void sha512_4way_init( sha512_4way_context *sc);
 void sha512_4way_update( sha512_4way_context *sc, const void *data,
                          size_t len );
 void sha512_4way_close( sha512_4way_context *sc, void *dst );
+void sha512_4way_full( void *dst, const void *data, size_t len );
 
 #endif  // AVX2
 
@@ -128,6 +132,7 @@ void sha512_8way_init( sha512_8way_context *sc);
 void sha512_8way_update( sha512_8way_context *sc, const void *data, 
                          size_t len );
 void sha512_8way_close( sha512_8way_context *sc, void *dst );
+void sha512_8way_full( void *dst, const void *data, size_t len );
 
 #endif  // AVX512
 
diff --git a/algo/sha/sha256-hash-4way.c b/algo/sha/sha256-hash-4way.c
index ed10673..d9fb503 100644
--- a/algo/sha/sha256-hash-4way.c
+++ b/algo/sha/sha256-hash-4way.c
@@ -330,6 +330,14 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
     mm128_block_bswap_32( dst, sc->val );
 }
 
+void sha256_4way_full( void *dst, const void *data, size_t len )
+{
+   sha256_4way_context ctx;
+   sha256_4way_init( &ctx );
+   sha256_4way_update( &ctx, data, len );
+   sha256_4way_close( &ctx, dst );
+}
+
 #if defined(__AVX2__)
 
 // SHA-256 8 way
@@ -498,6 +506,10 @@ void sha256_8way_init( sha256_8way_context *sc )
 */
 }
 
+
+// need to handle odd byte length for yespower.
+// Assume only last update is odd.
+
 void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len )
 {
    __m256i *vdata = (__m256i*)data;
@@ -564,6 +576,13 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst )
     mm256_block_bswap_32( dst, sc->val );
 }
 
+void sha256_8way_full( void *dst, const void *data, size_t len )
+{
+   sha256_8way_context ctx;
+   sha256_8way_init( &ctx );
+   sha256_8way_update( &ctx, data, len );
+   sha256_8way_close( &ctx, dst );
+}
 
 #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
 
@@ -791,6 +810,14 @@ void sha256_16way_close( sha256_16way_context *sc, void *dst )
     mm512_block_bswap_32( dst, sc->val );
 }
 
+void sha256_16way_full( void *dst, const void *data, size_t len )
+{
+   sha256_16way_context ctx;
+   sha256_16way_init( &ctx );
+   sha256_16way_update( &ctx, data, len );
+   sha256_16way_close( &ctx, dst );
+}
+
 #endif  // AVX512
 #endif  // __AVX2__
 #endif  // __SSE2__
diff --git a/algo/x17/sonoa-4way.c b/algo/x17/sonoa-4way.c
index 9b9380b..28ddd7e 100644
--- a/algo/x17/sonoa-4way.c
+++ b/algo/x17/sonoa-4way.c
@@ -58,7 +58,7 @@ union _sonoa_8way_context_overlay
 
 typedef union _sonoa_8way_context_overlay sonoa_8way_context_overlay;
 
-void sonoa_8way_hash( void *state, const void *input )
+int sonoa_8way_hash( void *state, const void *input, int thrid )
 {
      uint64_t vhash[8*8] __attribute__ ((aligned (128)));
      uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
@@ -186,6 +186,7 @@ void sonoa_8way_hash( void *state, const void *input )
 
 #endif
 
+     if ( work_restart[thrid].restart ) return 0;
 // 2
 
      bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
@@ -301,6 +302,7 @@ void sonoa_8way_hash( void *state, const void *input )
      hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
      hamsi512_8way_close( &ctx.hamsi, vhash );
 
+     if ( work_restart[thrid].restart ) return 0;
 // 3
 
      bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
@@ -430,6 +432,7 @@ void sonoa_8way_hash( void *state, const void *input )
      sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
      sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
 
+     if ( work_restart[thrid].restart ) return 0;
 // 4
 
      intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -627,6 +630,7 @@ void sonoa_8way_hash( void *state, const void *input )
 
 #endif
 
+     if ( work_restart[thrid].restart ) return 0;
 // 5
 
      bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
@@ -779,6 +783,7 @@ void sonoa_8way_hash( void *state, const void *input )
      sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
      sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );
 
+     if ( work_restart[thrid].restart ) return 0;
 // 6
 
      intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -947,6 +952,7 @@ void sonoa_8way_hash( void *state, const void *input )
      sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
      sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );
 
+     if ( work_restart[thrid].restart ) return 0;
 // 7
 
      intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -1108,6 +1114,8 @@ void sonoa_8way_hash( void *state, const void *input )
      haval256_5_8way_init( &ctx.haval );
      haval256_5_8way_update( &ctx.haval, vhashA, 64 );
      haval256_5_8way_close( &ctx.haval, state );
+
+     return 1;
 }
      
 int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
@@ -1133,8 +1141,7 @@ int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
 
    do
    {
-      sonoa_8way_hash( hash, vdata );
-
+      if ( sonoa_8way_hash( hash, vdata, thr_id ) )
       for ( int lane = 0; lane < 8; lane++ )
       if unlikely( ( hashd7[ lane ] <= targ32 ) )
       {
@@ -1142,7 +1149,7 @@ int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
          if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
          {
             pdata[19] = bswap_32( n + lane );
-            submit_lane_solution( work, lane_hash, mythr, lane );
+            submit_solution( work, lane_hash, mythr );
          }
       }
       *noncev = _mm512_add_epi32( *noncev,
@@ -1179,7 +1186,7 @@ union _sonoa_4way_context_overlay
 
 typedef union _sonoa_4way_context_overlay sonoa_4way_context_overlay;
 
-void sonoa_4way_hash( void *state, const void *input )
+int sonoa_4way_hash( void *state, const void *input, int thrid )
 {
      uint64_t hash0[8] __attribute__ ((aligned (64)));
      uint64_t hash1[8] __attribute__ ((aligned (64)));
@@ -1243,6 +1250,7 @@ void sonoa_4way_hash( void *state, const void *input )
      echo_full( &ctx.echo, (BitSequence *)hash3, 512,
                      (const BitSequence *)hash3, 64 );
      
+     if ( work_restart[thrid].restart ) return 0;
 // 2
 
      intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1302,6 +1310,7 @@ void sonoa_4way_hash( void *state, const void *input )
      hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
      hamsi512_4way_close( &ctx.hamsi, vhash );
 
+     if ( work_restart[thrid].restart ) return 0;
 // 3
 
      bmw512_4way_init( &ctx.bmw );
@@ -1366,6 +1375,7 @@ void sonoa_4way_hash( void *state, const void *input )
      sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
      sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
 
+     if ( work_restart[thrid].restart ) return 0;
 // 4
      intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
 
@@ -1462,6 +1472,7 @@ void sonoa_4way_hash( void *state, const void *input )
      shavite512_2way_init( &ctx.shavite );
      shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
 
+     if ( work_restart[thrid].restart ) return 0;
 // 5
      rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
 
@@ -1546,6 +1557,7 @@ void sonoa_4way_hash( void *state, const void *input )
      sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
      sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
 
+     if ( work_restart[thrid].restart ) return 0;
 // 6
 
      intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1638,6 +1650,7 @@ void sonoa_4way_hash( void *state, const void *input )
      sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
      sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
 
+     if ( work_restart[thrid].restart ) return 0;    
 // 7
 
      intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1728,6 +1741,8 @@ void sonoa_4way_hash( void *state, const void *input )
      haval256_5_4way_init( &ctx.haval );
      haval256_5_4way_update( &ctx.haval, vhashB, 64 );
      haval256_5_4way_close( &ctx.haval, state );
+
+     return 1;
 }
 
 int scanhash_sonoa_4way( struct work *work, const uint32_t max_nonce,
@@ -1752,8 +1767,7 @@ int scanhash_sonoa_4way( struct work *work, const uint32_t max_nonce,
 
      do
      {
-        sonoa_4way_hash( hash, vdata );
-
+        if ( sonoa_4way_hash( hash, vdata, thr_id ) )
         for ( int lane = 0; lane < 4; lane++ )
         if ( unlikely( hashd7[ lane ] <= targ32 ) )
         {
@@ -1761,7 +1775,7 @@ int scanhash_sonoa_4way( struct work *work, const uint32_t max_nonce,
            if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
            {
               pdata[19] = bswap_32( n + lane );
-              submit_lane_solution( work, lane_hash, mythr, lane );
+              submit_solution( work, lane_hash, mythr );
            }
         }
         *noncev = _mm256_add_epi32( *noncev,
diff --git a/algo/x17/sonoa-gate.c b/algo/x17/sonoa-gate.c
index 7dce68f..dfd8a70 100644
--- a/algo/x17/sonoa-gate.c
+++ b/algo/x17/sonoa-gate.c
@@ -4,14 +4,14 @@ bool register_sonoa_algo( algo_gate_t* gate )
 {
 #if defined (SONOA_8WAY)
   gate->scanhash  = (void*)&scanhash_sonoa_8way;
-  gate->hash      = (void*)&sonoa_8way_hash;
+//  gate->hash      = (void*)&sonoa_8way_hash;
 #elif defined (SONOA_4WAY)
   gate->scanhash  = (void*)&scanhash_sonoa_4way;
-  gate->hash      = (void*)&sonoa_4way_hash;
+//  gate->hash      = (void*)&sonoa_4way_hash;
 #else
   init_sonoa_ctx();
   gate->scanhash  = (void*)&scanhash_sonoa;
-  gate->hash      = (void*)&sonoa_hash;
+//  gate->hash      = (void*)&sonoa_hash;
 #endif
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
   return true;
diff --git a/algo/x17/sonoa-gate.h b/algo/x17/sonoa-gate.h
index aaad2a4..895bcd2 100644
--- a/algo/x17/sonoa-gate.h
+++ b/algo/x17/sonoa-gate.h
@@ -14,19 +14,19 @@ bool register_sonoa_algo( algo_gate_t* gate );
 
 #if defined(SONOA_8WAY)
 
-void sonoa_8way_hash( void *state, const void *input );
+int sonoa_8way_hash( void *state, const void *input, int thrid );
 int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
                          uint64_t *hashes_done, struct thr_info *mythr );
 
 #elif defined(SONOA_4WAY)
 
-void sonoa_4way_hash( void *state, const void *input );
+int sonoa_4way_hash( void *state, const void *input, int thrid );
 int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
                          uint64_t *hashes_done, struct thr_info *mythr );
 
 #else
 
-void sonoa_hash( void *state, const void *input );
+int sonoa_hash( void *state, const void *input, int thrid );
 int scanhash_sonoa( struct work *work, uint32_t max_nonce,
                   uint64_t *hashes_done, struct thr_info *mythr );
 void init_sonoa_ctx();
diff --git a/algo/x17/sonoa.c b/algo/x17/sonoa.c
index 2bf8ade..bad5946 100644
--- a/algo/x17/sonoa.c
+++ b/algo/x17/sonoa.c
@@ -83,27 +83,27 @@ void init_sonoa_ctx()
         sph_haval256_5_init(&sonoa_ctx.haval);
 };
 
-void sonoa_hash( void *state, const void *input )
+int sonoa_hash( void *state, const void *input, int thrid )
 {
 	uint8_t hash[128] __attribute__ ((aligned (64)));
-        sonoa_ctx_holder ctx __attribute__ ((aligned (64)));
-        memcpy( &ctx, &sonoa_ctx, sizeof(sonoa_ctx) );
+   sonoa_ctx_holder ctx __attribute__ ((aligned (64)));
+   memcpy( &ctx, &sonoa_ctx, sizeof(sonoa_ctx) );
 
-        sph_blake512(&ctx.blake, input, 80);
+   sph_blake512(&ctx.blake, input, 80);
 	sph_blake512_close(&ctx.blake, hash);
 
 	sph_bmw512(&ctx.bmw, hash, 64);
 	sph_bmw512_close(&ctx.bmw, hash);
 
 #if defined(__AES__)
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
-                                  (const char*)hash, 512 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
+                                     (const char*)hash, 512 );
 #else
-        sph_groestl512(&ctx.groestl, hash, 64);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512(&ctx.groestl, hash, 64);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif
 
-	sph_skein512(&ctx.skein, hash, 64);
+   sph_skein512(&ctx.skein, hash, 64);
 	sph_skein512_close(&ctx.skein, hash);
 
 	sph_jh512(&ctx.jh, hash, 64);
@@ -112,454 +112,461 @@ void sonoa_hash( void *state, const void *input )
 	sph_keccak512(&ctx.keccak, hash, 64);
 	sph_keccak512_close(&ctx.keccak, hash);
 
-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
-                                (const BitSequence*)hash, 64 );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+                                 (const BitSequence*)hash, 64 );
 
-        cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
-                              (const byte*)hash, 64 );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
+                                   (const byte*)hash, 64 );
 
 	sph_shavite512(&ctx.shavite, hash, 64);
 	sph_shavite512_close(&ctx.shavite, hash);
 
-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                          (const BitSequence *)hash, 512 );
 
 #if defined(__AES__)
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                             (const BitSequence *)hash, 512 );
 #else
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif
 
+   if ( work_restart[thrid].restart ) return 0;
 //
 
-        sph_bmw512_init( &ctx.bmw);
-        sph_bmw512(&ctx.bmw, hash, 64);
-        sph_bmw512_close(&ctx.bmw, hash);
+   sph_bmw512_init( &ctx.bmw);
+   sph_bmw512(&ctx.bmw, hash, 64);
+   sph_bmw512_close(&ctx.bmw, hash);
 
 #if defined(__AES__)
-        init_groestl( &ctx.groestl, 64 );
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
-                                  (const char*)hash, 512 );
+   init_groestl( &ctx.groestl, 64 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
+                                     (const char*)hash, 512 );
 #else
-        sph_groestl512_init(&ctx.groestl );
-        sph_groestl512(&ctx.groestl, hash, 64);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512_init(&ctx.groestl );
+   sph_groestl512(&ctx.groestl, hash, 64);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif
 
-        sph_skein512_init( &ctx.skein);
-        sph_skein512(&ctx.skein, hash, 64);
-        sph_skein512_close(&ctx.skein, hash);
+   sph_skein512_init( &ctx.skein);
+   sph_skein512(&ctx.skein, hash, 64);
+   sph_skein512_close(&ctx.skein, hash);
 
-        sph_jh512_init( &ctx.jh);
-        sph_jh512(&ctx.jh, hash, 64);
-        sph_jh512_close(&ctx.jh, hash);
+   sph_jh512_init( &ctx.jh);
+   sph_jh512(&ctx.jh, hash, 64);
+   sph_jh512_close(&ctx.jh, hash);
 
-        sph_keccak512_init( &ctx.keccak );
-        sph_keccak512(&ctx.keccak, hash, 64);
-        sph_keccak512_close(&ctx.keccak, hash);
+   sph_keccak512_init( &ctx.keccak );
+   sph_keccak512(&ctx.keccak, hash, 64);
+   sph_keccak512_close(&ctx.keccak, hash);
 
-        init_luffa( &ctx.luffa, 512 );
-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
-                                (const BitSequence*)hash, 64 );
+   init_luffa( &ctx.luffa, 512 );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+                                 (const BitSequence*)hash, 64 );
 
-        cubehashInit( &ctx.cubehash, 512, 16, 32 );
-        cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
-                              (const byte*)hash, 64 );
+   cubehashInit( &ctx.cubehash, 512, 16, 32 );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
+                                   (const byte*)hash, 64 );
 
-        sph_shavite512_init( &ctx.shavite );
-        sph_shavite512(&ctx.shavite, hash, 64);
-        sph_shavite512_close(&ctx.shavite, hash);
+   sph_shavite512_init( &ctx.shavite );
+   sph_shavite512(&ctx.shavite, hash, 64);
+   sph_shavite512_close(&ctx.shavite, hash);
 
-        init_sd( &ctx.simd, 512 );
-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   init_sd( &ctx.simd, 512 );
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                          (const BitSequence *)hash, 512 );
 
 #if defined(__AES__)
-        init_echo( &ctx.echo, 512 );
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   init_echo( &ctx.echo, 512 );
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                             (const BitSequence *)hash, 512 );
 #else
-        sph_echo512_init( &ctx.echo );
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512_init( &ctx.echo );
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif
 
-        sph_hamsi512(&ctx.hamsi, hash, 64);
-        sph_hamsi512_close(&ctx.hamsi, hash);
+   sph_hamsi512(&ctx.hamsi, hash, 64);
+   sph_hamsi512_close(&ctx.hamsi, hash);
 	
+   if ( work_restart[thrid].restart ) return 0;
 //
 
-        sph_bmw512_init( &ctx.bmw);
-	sph_bmw512(&ctx.bmw, hash, 64);
-        sph_bmw512_close(&ctx.bmw, hash);
+   sph_bmw512_init( &ctx.bmw);
+   sph_bmw512(&ctx.bmw, hash, 64);
+   sph_bmw512_close(&ctx.bmw, hash);
 
 #if defined(__AES__)
-        init_groestl( &ctx.groestl, 64 );
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
-                                  (const char*)hash, 512 );
+   init_groestl( &ctx.groestl, 64 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
+                                     (const char*)hash, 512 );
 #else
-        sph_groestl512_init(&ctx.groestl );
-        sph_groestl512(&ctx.groestl, hash, 64);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512_init(&ctx.groestl );
+   sph_groestl512(&ctx.groestl, hash, 64);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif
 
-        sph_skein512_init( &ctx.skein);
-        sph_skein512(&ctx.skein, hash, 64);
-        sph_skein512_close(&ctx.skein, hash);
+   sph_skein512_init( &ctx.skein);
+   sph_skein512(&ctx.skein, hash, 64);
+   sph_skein512_close(&ctx.skein, hash);
 
-        sph_jh512_init( &ctx.jh);
-        sph_jh512(&ctx.jh, hash, 64);
-        sph_jh512_close(&ctx.jh, hash);
+   sph_jh512_init( &ctx.jh);
+   sph_jh512(&ctx.jh, hash, 64);
+   sph_jh512_close(&ctx.jh, hash);
 
-        sph_keccak512_init( &ctx.keccak );
-        sph_keccak512(&ctx.keccak, hash, 64);
-        sph_keccak512_close(&ctx.keccak, hash);
+   sph_keccak512_init( &ctx.keccak );
+   sph_keccak512(&ctx.keccak, hash, 64);
+   sph_keccak512_close(&ctx.keccak, hash);
 
-        init_luffa( &ctx.luffa, 512 );
-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
-                                (const BitSequence*)hash, 64 );
+   init_luffa( &ctx.luffa, 512 );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+                                 (const BitSequence*)hash, 64 );
 
-        cubehashInit( &ctx.cubehash, 512, 16, 32 );
-        cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
-                              (const byte*)hash, 64 );
+   cubehashInit( &ctx.cubehash, 512, 16, 32 );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
+                                  (const byte*)hash, 64 );
 
-        sph_shavite512_init( &ctx.shavite );
-        sph_shavite512(&ctx.shavite, hash, 64);
-        sph_shavite512_close(&ctx.shavite, hash);
+   sph_shavite512_init( &ctx.shavite );
+   sph_shavite512(&ctx.shavite, hash, 64);
+   sph_shavite512_close(&ctx.shavite, hash);
 
-        init_sd( &ctx.simd, 512 );
-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   init_sd( &ctx.simd, 512 );
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                          (const BitSequence *)hash, 512 );
 
 #if defined(__AES__)
-        init_echo( &ctx.echo, 512 );
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   init_echo( &ctx.echo, 512 );
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                             (const BitSequence *)hash, 512 );
 #else
-        sph_echo512_init( &ctx.echo );
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512_init( &ctx.echo );
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif
 
-        sph_hamsi512_init( &ctx.hamsi );
-        sph_hamsi512(&ctx.hamsi, hash, 64);
-        sph_hamsi512_close(&ctx.hamsi, hash);
+   sph_hamsi512_init( &ctx.hamsi );
+   sph_hamsi512(&ctx.hamsi, hash, 64);
+   sph_hamsi512_close(&ctx.hamsi, hash);
 
-        sph_fugue512(&ctx.fugue, hash, 64);
-        sph_fugue512_close(&ctx.fugue, hash);
+   sph_fugue512(&ctx.fugue, hash, 64);
+   sph_fugue512_close(&ctx.fugue, hash);
 
+   if ( work_restart[thrid].restart ) return 0;
 //
 
-        sph_bmw512_init( &ctx.bmw);
-        sph_bmw512(&ctx.bmw, hash, 64);
-        sph_bmw512_close(&ctx.bmw, hash);
+   sph_bmw512_init( &ctx.bmw);
+   sph_bmw512(&ctx.bmw, hash, 64);
+   sph_bmw512_close(&ctx.bmw, hash);
 
 #if defined(__AES__)
-        init_groestl( &ctx.groestl, 64 );
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
+   init_groestl( &ctx.groestl, 64 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
                                   (const char*)hash, 512 );
 #else
-        sph_groestl512_init(&ctx.groestl );
-        sph_groestl512(&ctx.groestl, hash, 64);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512_init(&ctx.groestl );
+   sph_groestl512(&ctx.groestl, hash, 64);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif
 
-        sph_skein512_init( &ctx.skein);
-        sph_skein512(&ctx.skein, hash, 64);
-        sph_skein512_close(&ctx.skein, hash);
+   sph_skein512_init( &ctx.skein);
+   sph_skein512(&ctx.skein, hash, 64);
+   sph_skein512_close(&ctx.skein, hash);
 
-        sph_jh512_init( &ctx.jh);
-        sph_jh512(&ctx.jh, hash, 64);
-        sph_jh512_close(&ctx.jh, hash);
+   sph_jh512_init( &ctx.jh);
+   sph_jh512(&ctx.jh, hash, 64);
+   sph_jh512_close(&ctx.jh, hash);
 
-        sph_keccak512_init( &ctx.keccak );
-        sph_keccak512(&ctx.keccak, hash, 64);
-        sph_keccak512_close(&ctx.keccak, hash);
+   sph_keccak512_init( &ctx.keccak );
+   sph_keccak512(&ctx.keccak, hash, 64);
+   sph_keccak512_close(&ctx.keccak, hash);
 
-        init_luffa( &ctx.luffa, 512 );
-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+   init_luffa( &ctx.luffa, 512 );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
                                 (const BitSequence*)hash, 64 );
 
-        cubehashInit( &ctx.cubehash, 512, 16, 32 );
-        cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
+   cubehashInit( &ctx.cubehash, 512, 16, 32 );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
                               (const byte*)hash, 64 );
 
-        sph_shavite512_init( &ctx.shavite );
-        sph_shavite512(&ctx.shavite, hash, 64);
-        sph_shavite512_close(&ctx.shavite, hash);
+   sph_shavite512_init( &ctx.shavite );
+   sph_shavite512(&ctx.shavite, hash, 64);
+   sph_shavite512_close(&ctx.shavite, hash);
 
-        init_sd( &ctx.simd, 512 );
-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   init_sd( &ctx.simd, 512 );
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                          (const BitSequence *)hash, 512 );
 
 #if defined(__AES__)
-        init_echo( &ctx.echo, 512 );
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   init_echo( &ctx.echo, 512 );
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                             (const BitSequence *)hash, 512 );
 #else
-        sph_echo512_init( &ctx.echo );
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512_init( &ctx.echo );
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif
 
-        sph_hamsi512_init( &ctx.hamsi );
-        sph_hamsi512(&ctx.hamsi, hash, 64);
-        sph_hamsi512_close(&ctx.hamsi, hash);
+   sph_hamsi512_init( &ctx.hamsi );
+   sph_hamsi512(&ctx.hamsi, hash, 64);
+   sph_hamsi512_close(&ctx.hamsi, hash);
 
-        sph_fugue512_init( &ctx.fugue );
-        sph_fugue512(&ctx.fugue, hash, 64);
-        sph_fugue512_close(&ctx.fugue, hash);
+   sph_fugue512_init( &ctx.fugue );
+   sph_fugue512(&ctx.fugue, hash, 64);
+   sph_fugue512_close(&ctx.fugue, hash);
 
-        sph_shabal512(&ctx.shabal, hash, 64);
-        sph_shabal512_close(&ctx.shabal, hash);
+   sph_shabal512(&ctx.shabal, hash, 64);
+   sph_shabal512_close(&ctx.shabal, hash);
 
-        sph_hamsi512_init( &ctx.hamsi );
-        sph_hamsi512(&ctx.hamsi, hash, 64);
-        sph_hamsi512_close(&ctx.hamsi, hash);
+   sph_hamsi512_init( &ctx.hamsi );
+   sph_hamsi512(&ctx.hamsi, hash, 64);
+   sph_hamsi512_close(&ctx.hamsi, hash);
 
 #if defined(__AES__)
-        init_echo( &ctx.echo, 512 );
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   init_echo( &ctx.echo, 512 );
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                             (const BitSequence *)hash, 512 );
 #else
-        sph_echo512_init( &ctx.echo );
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512_init( &ctx.echo );
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif
 
-        sph_shavite512_init( &ctx.shavite );
-        sph_shavite512(&ctx.shavite, hash, 64);
-        sph_shavite512_close(&ctx.shavite, hash);
+   sph_shavite512_init( &ctx.shavite );
+   sph_shavite512(&ctx.shavite, hash, 64);
+   sph_shavite512_close(&ctx.shavite, hash);
 
+   if ( work_restart[thrid].restart ) return 0;
 //
 
-        sph_bmw512_init( &ctx.bmw);
-        sph_bmw512(&ctx.bmw, hash, 64);
-        sph_bmw512_close(&ctx.bmw, hash);
+   sph_bmw512_init( &ctx.bmw);
+   sph_bmw512(&ctx.bmw, hash, 64);
+   sph_bmw512_close(&ctx.bmw, hash);
 
-        sph_shabal512_init( &ctx.shabal );
+   sph_shabal512_init( &ctx.shabal );
 	sph_shabal512(&ctx.shabal, hash, 64);
-        sph_shabal512_close(&ctx.shabal, hash);
+   sph_shabal512_close(&ctx.shabal, hash);
 
 #if defined(__AES__)
-        init_groestl( &ctx.groestl, 64 );
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
+   init_groestl( &ctx.groestl, 64 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
                                   (const char*)hash, 512 );
 #else
-        sph_groestl512_init(&ctx.groestl );
-        sph_groestl512(&ctx.groestl, hash, 64);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512_init(&ctx.groestl );
+   sph_groestl512(&ctx.groestl, hash, 64);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif
 
-        sph_skein512_init( &ctx.skein);
-        sph_skein512(&ctx.skein, hash, 64);
-        sph_skein512_close(&ctx.skein, hash);
+   sph_skein512_init( &ctx.skein);
+   sph_skein512(&ctx.skein, hash, 64);
+   sph_skein512_close(&ctx.skein, hash);
 
-        sph_jh512_init( &ctx.jh);
-        sph_jh512(&ctx.jh, hash, 64);
-        sph_jh512_close(&ctx.jh, hash);
+   sph_jh512_init( &ctx.jh);
+   sph_jh512(&ctx.jh, hash, 64);
+   sph_jh512_close(&ctx.jh, hash);
 
-        sph_keccak512_init( &ctx.keccak );
-        sph_keccak512(&ctx.keccak, hash, 64);
-        sph_keccak512_close(&ctx.keccak, hash);
+   sph_keccak512_init( &ctx.keccak );
+   sph_keccak512(&ctx.keccak, hash, 64);
+   sph_keccak512_close(&ctx.keccak, hash);
 
-        init_luffa( &ctx.luffa, 512 );
-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+   init_luffa( &ctx.luffa, 512 );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
                                 (const BitSequence*)hash, 64 );
 
-        cubehashInit( &ctx.cubehash, 512, 16, 32 );
-        cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
+   cubehashInit( &ctx.cubehash, 512, 16, 32 );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
                               (const byte*)hash, 64 );
 
-        sph_shavite512_init( &ctx.shavite );
-        sph_shavite512(&ctx.shavite, hash, 64);
-        sph_shavite512_close(&ctx.shavite, hash);
+   sph_shavite512_init( &ctx.shavite );
+   sph_shavite512(&ctx.shavite, hash, 64);
+   sph_shavite512_close(&ctx.shavite, hash);
 
-        init_sd( &ctx.simd, 512 );
-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   init_sd( &ctx.simd, 512 );
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                          (const BitSequence *)hash, 512 );
 
 #if defined(__AES__)
-        init_echo( &ctx.echo, 512 );
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   init_echo( &ctx.echo, 512 );
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                             (const BitSequence *)hash, 512 );
 #else
-        sph_echo512_init( &ctx.echo );
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512_init( &ctx.echo );
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif
 
-        sph_hamsi512_init( &ctx.hamsi );
-        sph_hamsi512(&ctx.hamsi, hash, 64);
-        sph_hamsi512_close(&ctx.hamsi, hash);
+   sph_hamsi512_init( &ctx.hamsi );
+   sph_hamsi512(&ctx.hamsi, hash, 64);
+   sph_hamsi512_close(&ctx.hamsi, hash);
 
-        sph_fugue512_init( &ctx.fugue );
-        sph_fugue512(&ctx.fugue, hash, 64);
-        sph_fugue512_close(&ctx.fugue, hash);
+   sph_fugue512_init( &ctx.fugue );
+   sph_fugue512(&ctx.fugue, hash, 64);
+   sph_fugue512_close(&ctx.fugue, hash);
 
-        sph_shabal512_init( &ctx.shabal );
-        sph_shabal512(&ctx.shabal, hash, 64);
-        sph_shabal512_close(&ctx.shabal, hash);
+   sph_shabal512_init( &ctx.shabal );
+   sph_shabal512(&ctx.shabal, hash, 64);
+   sph_shabal512_close(&ctx.shabal, hash);
 
-        sph_whirlpool(&ctx.whirlpool, hash, 64);
-        sph_whirlpool_close(&ctx.whirlpool, hash);
+   sph_whirlpool(&ctx.whirlpool, hash, 64);
+   sph_whirlpool_close(&ctx.whirlpool, hash);
 
+   if ( work_restart[thrid].restart ) return 0;
 //
-        sph_bmw512_init( &ctx.bmw);
-        sph_bmw512(&ctx.bmw, hash, 64);
-        sph_bmw512_close(&ctx.bmw, hash);
+   sph_bmw512_init( &ctx.bmw);
+   sph_bmw512(&ctx.bmw, hash, 64);
+   sph_bmw512_close(&ctx.bmw, hash);
 
 #if defined(__AES__)
-        init_groestl( &ctx.groestl, 64 );
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
+   init_groestl( &ctx.groestl, 64 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
                                   (const char*)hash, 512 );
 #else
-        sph_groestl512_init(&ctx.groestl );
-        sph_groestl512(&ctx.groestl, hash, 64);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512_init(&ctx.groestl );
+   sph_groestl512(&ctx.groestl, hash, 64);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif
 
-        sph_skein512_init( &ctx.skein);
-        sph_skein512(&ctx.skein, hash, 64);
-        sph_skein512_close(&ctx.skein, hash);
+   sph_skein512_init( &ctx.skein);
+   sph_skein512(&ctx.skein, hash, 64);
+   sph_skein512_close(&ctx.skein, hash);
 
-        sph_jh512_init( &ctx.jh);
-        sph_jh512(&ctx.jh, hash, 64);
-        sph_jh512_close(&ctx.jh, hash);
+   sph_jh512_init( &ctx.jh);
+   sph_jh512(&ctx.jh, hash, 64);
+   sph_jh512_close(&ctx.jh, hash);
 
-        sph_keccak512_init( &ctx.keccak );
-        sph_keccak512(&ctx.keccak, hash, 64);
-        sph_keccak512_close(&ctx.keccak, hash);
+   sph_keccak512_init( &ctx.keccak );
+   sph_keccak512(&ctx.keccak, hash, 64);
+   sph_keccak512_close(&ctx.keccak, hash);
 
-        init_luffa( &ctx.luffa, 512 );
-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+   init_luffa( &ctx.luffa, 512 );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
                                 (const BitSequence*)hash, 64 );
 
-        cubehashInit( &ctx.cubehash, 512, 16, 32 );
-        cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
+   cubehashInit( &ctx.cubehash, 512, 16, 32 );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
                               (const byte*)hash, 64 );
 
-        sph_shavite512_init( &ctx.shavite );
-        sph_shavite512(&ctx.shavite, hash, 64);
-        sph_shavite512_close(&ctx.shavite, hash);
+   sph_shavite512_init( &ctx.shavite );
+   sph_shavite512(&ctx.shavite, hash, 64);
+   sph_shavite512_close(&ctx.shavite, hash);
 
-        init_sd( &ctx.simd, 512 );
-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   init_sd( &ctx.simd, 512 );
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                          (const BitSequence *)hash, 512 );
 
 #if defined(__AES__)
-        init_echo( &ctx.echo, 512 );
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   init_echo( &ctx.echo, 512 );
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                             (const BitSequence *)hash, 512 );
 #else
-        sph_echo512_init( &ctx.echo );
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512_init( &ctx.echo );
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif
 
-        sph_hamsi512_init( &ctx.hamsi );
-        sph_hamsi512(&ctx.hamsi, hash, 64);
-        sph_hamsi512_close(&ctx.hamsi, hash);
+   sph_hamsi512_init( &ctx.hamsi );
+   sph_hamsi512(&ctx.hamsi, hash, 64);
+   sph_hamsi512_close(&ctx.hamsi, hash);
 
-        sph_fugue512_init( &ctx.fugue );
-        sph_fugue512(&ctx.fugue, hash, 64);
-        sph_fugue512_close(&ctx.fugue, hash);
+   sph_fugue512_init( &ctx.fugue );
+   sph_fugue512(&ctx.fugue, hash, 64);
+   sph_fugue512_close(&ctx.fugue, hash);
 
-        sph_shabal512_init( &ctx.shabal );
-        sph_shabal512(&ctx.shabal, hash, 64);
-        sph_shabal512_close(&ctx.shabal, hash);
+   sph_shabal512_init( &ctx.shabal );
+   sph_shabal512(&ctx.shabal, hash, 64);
+   sph_shabal512_close(&ctx.shabal, hash);
 
-        sph_whirlpool_init( &ctx.whirlpool );
-        sph_whirlpool(&ctx.whirlpool, hash, 64);
-        sph_whirlpool_close(&ctx.whirlpool, hash);
+   sph_whirlpool_init( &ctx.whirlpool );
+   sph_whirlpool(&ctx.whirlpool, hash, 64);
+   sph_whirlpool_close(&ctx.whirlpool, hash);
 
-        SHA512_Update( &ctx.sha512, hash, 64 );
-        SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
+   SHA512_Update( &ctx.sha512, hash, 64 );
+   SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
 
-        sph_whirlpool_init( &ctx.whirlpool );
-        sph_whirlpool(&ctx.whirlpool, hash, 64);
-        sph_whirlpool_close(&ctx.whirlpool, hash);
+   sph_whirlpool_init( &ctx.whirlpool );
+   sph_whirlpool(&ctx.whirlpool, hash, 64);
+   sph_whirlpool_close(&ctx.whirlpool, hash);
 
+   if ( work_restart[thrid].restart ) return 0;
 //
 
-        sph_bmw512_init( &ctx.bmw);
-        sph_bmw512(&ctx.bmw, hash, 64);
-        sph_bmw512_close(&ctx.bmw, hash);
+   sph_bmw512_init( &ctx.bmw);
+   sph_bmw512(&ctx.bmw, hash, 64);
+   sph_bmw512_close(&ctx.bmw, hash);
 
 #if defined(__AES__)
-        init_groestl( &ctx.groestl, 64 );
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
+   init_groestl( &ctx.groestl, 64 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
                                   (const char*)hash, 512 );
 #else
-        sph_groestl512_init(&ctx.groestl );
-        sph_groestl512(&ctx.groestl, hash, 64);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512_init(&ctx.groestl );
+   sph_groestl512(&ctx.groestl, hash, 64);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif
 
-        sph_skein512_init( &ctx.skein);
-        sph_skein512(&ctx.skein, hash, 64);
-        sph_skein512_close(&ctx.skein, hash);
+   sph_skein512_init( &ctx.skein);
+   sph_skein512(&ctx.skein, hash, 64);
+   sph_skein512_close(&ctx.skein, hash);
 
-        sph_jh512_init( &ctx.jh);
-        sph_jh512(&ctx.jh, hash, 64);
-        sph_jh512_close(&ctx.jh, hash);
+   sph_jh512_init( &ctx.jh);
+   sph_jh512(&ctx.jh, hash, 64);
+   sph_jh512_close(&ctx.jh, hash);
 
-        sph_keccak512_init( &ctx.keccak );
-        sph_keccak512(&ctx.keccak, hash, 64);
-        sph_keccak512_close(&ctx.keccak, hash);
+   sph_keccak512_init( &ctx.keccak );
+   sph_keccak512(&ctx.keccak, hash, 64);
+   sph_keccak512_close(&ctx.keccak, hash);
 
-        init_luffa( &ctx.luffa, 512 );
-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+   init_luffa( &ctx.luffa, 512 );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
                                 (const BitSequence*)hash, 64 );
 
-        cubehashInit( &ctx.cubehash, 512, 16, 32 );
-        cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
+   cubehashInit( &ctx.cubehash, 512, 16, 32 );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
                               (const byte*)hash, 64 );
 
-        sph_shavite512_init( &ctx.shavite );
-        sph_shavite512(&ctx.shavite, hash, 64);
-        sph_shavite512_close(&ctx.shavite, hash);
+   sph_shavite512_init( &ctx.shavite );
+   sph_shavite512(&ctx.shavite, hash, 64);
+   sph_shavite512_close(&ctx.shavite, hash);
 
-        init_sd( &ctx.simd, 512 );
-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   init_sd( &ctx.simd, 512 );
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                          (const BitSequence *)hash, 512 );
 
 #if defined(__AES__)
-        init_echo( &ctx.echo, 512 );
-        update_final_echo ( &ctx.echo, (BitSequence *)hash,
+   init_echo( &ctx.echo, 512 );
+   update_final_echo ( &ctx.echo, (BitSequence *)hash,
                             (const BitSequence *)hash, 512 );
 #else
-        sph_echo512_init( &ctx.echo );
-        sph_echo512(&ctx.echo, hash, 64);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512_init( &ctx.echo );
+   sph_echo512(&ctx.echo, hash, 64);
+   sph_echo512_close(&ctx.echo, hash);
 #endif
 
-        sph_hamsi512_init( &ctx.hamsi );
-        sph_hamsi512(&ctx.hamsi, hash, 64);
-        sph_hamsi512_close(&ctx.hamsi, hash);
+   sph_hamsi512_init( &ctx.hamsi );
+   sph_hamsi512(&ctx.hamsi, hash, 64);
+   sph_hamsi512_close(&ctx.hamsi, hash);
 
-        sph_fugue512_init( &ctx.fugue );
-        sph_fugue512(&ctx.fugue, hash, 64);
-        sph_fugue512_close(&ctx.fugue, hash);
+   sph_fugue512_init( &ctx.fugue );
+   sph_fugue512(&ctx.fugue, hash, 64);
+   sph_fugue512_close(&ctx.fugue, hash);
 
-        sph_shabal512_init( &ctx.shabal );
-        sph_shabal512(&ctx.shabal, hash, 64);
-        sph_shabal512_close(&ctx.shabal, hash);
+   sph_shabal512_init( &ctx.shabal );
+   sph_shabal512(&ctx.shabal, hash, 64);
+   sph_shabal512_close(&ctx.shabal, hash);
 
-        sph_whirlpool_init( &ctx.whirlpool );
-        sph_whirlpool(&ctx.whirlpool, hash, 64);
-        sph_whirlpool_close(&ctx.whirlpool, hash);
+   sph_whirlpool_init( &ctx.whirlpool );
+   sph_whirlpool(&ctx.whirlpool, hash, 64);
+   sph_whirlpool_close(&ctx.whirlpool, hash);
 
-        SHA512_Init( &ctx.sha512 );
-        SHA512_Update( &ctx.sha512, hash, 64 );
-        SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
+   SHA512_Init( &ctx.sha512 );
+   SHA512_Update( &ctx.sha512, hash, 64 );
+   SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
 
-        sph_haval256_5(&ctx.haval,(const void*) hash, 64);
-        sph_haval256_5_close(&ctx.haval, hash);
+   sph_haval256_5(&ctx.haval,(const void*) hash, 64);
+   sph_haval256_5_close(&ctx.haval, hash);
 
    memcpy(state, hash, 32);
+   return 1;
 }
 
 int scanhash_sonoa( struct work *work, uint32_t max_nonce,
@@ -579,7 +586,7 @@ int scanhash_sonoa( struct work *work, uint32_t max_nonce,
    do
    {
       edata[19] = n;
-      sonoa_hash( hash64, edata );
+      if ( sonoa_hash( hash64, edata, thr_id ) )
       if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
       {
          pdata[19] = bswap_32( n );
diff --git a/algo/yescrypt/yescrypt-platform.h b/algo/yescrypt/yescrypt-platform.h
index a80640c..bf6df91 100644
--- a/algo/yescrypt/yescrypt-platform.h
+++ b/algo/yescrypt/yescrypt-platform.h
@@ -31,6 +31,7 @@
 #undef HUGEPAGE_SIZE
 #endif
 
+/*
 static __inline uint32_t
 le32dec(const void *pp)
 {
@@ -50,6 +51,7 @@ le32enc(void *pp, uint32_t x)
 	p[2] = (x >> 16) & 0xff;
 	p[3] = (x >> 24) & 0xff;
 }
+*/
 
 static void *
 alloc_region(yescrypt_region_t * region, size_t size)
@@ -154,7 +156,7 @@ int yescrypt_init_shared(yescrypt_shared_t * shared, const uint8_t * param, size
 	if (yescrypt_kdf(&dummy, shared1,
 	    param, paramlen, NULL, 0, N, r, p, 0,
 	    YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1,
-	    salt, sizeof(salt)))
+	    salt, sizeof(salt), 0 ) )
 		goto out;
 
 	half1 = half2 = *shared;
@@ -166,19 +168,19 @@ int yescrypt_init_shared(yescrypt_shared_t * shared, const uint8_t * param, size
 	if (p > 1 && yescrypt_kdf(&half1, &half2.shared1,
 	    param, paramlen, salt, sizeof(salt), N, r, p, 0,
 	    YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_2,
-	    salt, sizeof(salt)))
+	    salt, sizeof(salt), 0 ))
 		goto out;
 
 	if (yescrypt_kdf(&half2, &half1.shared1,
 	    param, paramlen, salt, sizeof(salt), N, r, p, 0,
 	    YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1,
-	    salt, sizeof(salt)))
+	    salt, sizeof(salt), 0))
 		goto out;
 
 	if (yescrypt_kdf(&half1, &half2.shared1,
 	    param, paramlen, salt, sizeof(salt), N, r, p, 0,
 	    YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1,
-	    buf, buflen))
+	    buf, buflen, 0))
 		goto out;
 
 	shared->mask1 = mask;
diff --git a/algo/yescrypt/yescrypt-simd.c b/algo/yescrypt/yescrypt-simd.c
index d2e46b1..41d97fe 100644
--- a/algo/yescrypt/yescrypt-simd.c
+++ b/algo/yescrypt/yescrypt-simd.c
@@ -1149,7 +1149,7 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
     const uint8_t * passwd, size_t passwdlen,
     const uint8_t * salt, size_t saltlen,
     uint64_t N, uint32_t r, uint32_t p, uint32_t t, yescrypt_flags_t flags,
-    uint8_t * buf, size_t buflen)
+    uint8_t * buf, size_t buflen, int thrid )
 {
 	uint8_t _ALIGN(128) sha256[32];
 	yescrypt_region_t tmp;
@@ -1157,6 +1157,7 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
 	size_t B_size, V_size, XY_size, need;
 	uint8_t * B, * S;
 	salsa20_blk_t * V, * XY;
+   int retval = 1;
 
 	/*
 	 * YESCRYPT_PARALLEL_SMIX is a no-op at p = 1 for its intended purpose,
@@ -1312,6 +1313,12 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
 	/* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
 	PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, B, B_size);
 
+   if ( work_restart[thrid].restart ) 
+   { 
+     retval = 0; 
+     goto out;
+   }
+   
 	if (t || flags)
 		memcpy(sha256, B, sizeof(sha256));
 
@@ -1339,9 +1346,21 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
 		}
 	}
 
+   if ( work_restart[thrid].restart )
+   {
+     retval = 0;
+     goto out;
+   }
+
 	/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
 	PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, buf, buflen);
 
+   if ( work_restart[thrid].restart ) 
+   { 
+     retval = 0; 
+     goto out;
+   }
+   
 	/*
 	 * Except when computing classic scrypt, allow all computation so far
 	 * to be performed on the client.  The final steps below match those of
@@ -1370,9 +1389,10 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
 	   }
 	}
 
+out:   
 	if (free_region(&tmp))
 		return -1;
 
 	/* Success! */
-	return 0;
+	return retval;
 }
diff --git a/algo/yescrypt/yescrypt.c b/algo/yescrypt/yescrypt.c
index f4adc47..fc78ff2 100644
--- a/algo/yescrypt/yescrypt.c
+++ b/algo/yescrypt/yescrypt.c
@@ -106,7 +106,8 @@ static const uint8_t* decode64_uint32(uint32_t* dst, uint32_t dstbits, const uin
 }
 
 uint8_t* yescrypt_r(const yescrypt_shared_t* shared, yescrypt_local_t* local,
-    const uint8_t* passwd, size_t passwdlen, const uint8_t* setting, uint8_t* buf, size_t buflen)
+    const uint8_t* passwd, size_t passwdlen, const uint8_t* setting,
+    uint8_t* buf, size_t buflen, int thrid )
 {
 	uint8_t hash[HASH_SIZE];
 	const uint8_t * src, * salt;
@@ -210,7 +211,9 @@ uint8_t* yescrypt_r(const yescrypt_shared_t* shared, yescrypt_local_t* local,
 		return NULL;
 	}
 
-	if (yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, N, r, p, 0, flags, hash, sizeof(hash))) {
+	if ( yescrypt_kdf( shared, local, passwd, passwdlen, salt, saltlen, N, r, p,
+            0, flags, hash, sizeof(hash), thrid ) == -1 )
+   {
 		printf("died10 ...");
 		fflush(stdout);
 		return NULL;
@@ -237,7 +240,7 @@ uint8_t* yescrypt_r(const yescrypt_shared_t* shared, yescrypt_local_t* local,
 	return buf;
 }
 
-uint8_t* yescrypt(const uint8_t* passwd, const uint8_t* setting)
+uint8_t* yescrypt(const uint8_t* passwd, const uint8_t* setting, int thrid )
 {
 	static uint8_t buf[4 + 1 + 5 + 5 + BYTES2CHARS(32) + 1 + HASH_LEN + 1];
 	yescrypt_shared_t shared;
@@ -252,7 +255,7 @@ uint8_t* yescrypt(const uint8_t* passwd, const uint8_t* setting)
 		return NULL;
 	}
 	retval = yescrypt_r(&shared, &local,
-	    passwd, 80, setting, buf, sizeof(buf));
+	    passwd, 80, setting, buf, sizeof(buf), thrid );
 	//printf("hashse='%s'\n", (char *)retval);
 	if (yescrypt_free_local(&local)) {
 		yescrypt_free_shared(&shared);
@@ -329,7 +332,7 @@ uint8_t* yescrypt_gensalt(uint32_t N_log2, uint32_t r, uint32_t p, yescrypt_flag
 
 static int yescrypt_bsty(const uint8_t * passwd, size_t passwdlen,
     const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p,
-    uint8_t * buf, size_t buflen)
+    uint8_t * buf, size_t buflen, int thrid )
 {
 	static __thread int initialized = 0;
 	static __thread yescrypt_shared_t shared;
@@ -349,7 +352,7 @@ static int yescrypt_bsty(const uint8_t * passwd, size_t passwdlen,
 	}
 	retval = yescrypt_kdf(&shared, &local,
 	    passwd, passwdlen, salt, saltlen, N, r, p, 0, YESCRYPT_FLAGS,
-	    buf, buflen);
+	    buf, buflen, thrid );
 #if 0
 	if (yescrypt_free_local(&local)) {
 		yescrypt_free_shared(&shared);
@@ -370,16 +373,16 @@ char *yescrypt_client_key = NULL;
 int yescrypt_client_key_len = 0;
 
 /* main hash 80 bytes input */
-void yescrypt_hash( const char *input, char *output, uint32_t len )
+int yescrypt_hash( const char *input, char *output, uint32_t len, int thrid )
 {
-   yescrypt_bsty( (uint8_t*)input, len, (uint8_t*)input, len, YESCRYPT_N,
-                  YESCRYPT_R, YESCRYPT_P, (uint8_t*)output, 32 );
+   return yescrypt_bsty( (uint8_t*)input, len, (uint8_t*)input, len, YESCRYPT_N,
+                  YESCRYPT_R, YESCRYPT_P, (uint8_t*)output, 32, thrid );
 }
 
 /* for util.c test */
-void yescrypthash(void *output, const void *input)
+int yescrypthash(void *output, const void *input, int thrid)
 {
-	yescrypt_hash((char*) input, (char*) output, 80);
+	return yescrypt_hash((char*) input, (char*) output, 80, thrid);
 }
 
 int scanhash_yescrypt( struct work *work, uint32_t max_nonce,
@@ -392,13 +395,13 @@ int scanhash_yescrypt( struct work *work, uint32_t max_nonce,
    const uint32_t first_nonce = pdata[19];
    const uint32_t last_nonce = max_nonce;
    uint32_t n = first_nonce;
-   int thr_id = mythr->id;  // thr_id arg is deprecated
+   int thr_id = mythr->id; 
 
    for ( int k = 0; k < 19; k++ )
       be32enc( &endiandata[k], pdata[k] );
    endiandata[19] = n;
    do {
-      yescrypt_hash((char*) endiandata, (char*) vhash, 80);
+      if ( yescrypt_hash((char*) endiandata, (char*) vhash, 80, thr_id ) )
       if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark )
       {
           be32enc( pdata+19, n );
diff --git a/algo/yescrypt/yescrypt.h b/algo/yescrypt/yescrypt.h
index c33ba40..51be262 100644
--- a/algo/yescrypt/yescrypt.h
+++ b/algo/yescrypt/yescrypt.h
@@ -38,12 +38,13 @@ extern "C" {
 #include <stdint.h>
 #include <stdlib.h> /* for size_t */
 #include <stdbool.h>
+#include "miner.h"
 
 //#define  __SSE4_1__
 
-void yescrypt_hash(const char* input, char* output, uint32_t len);
+int yescrypt_hash(const char* input, char* output, uint32_t len, int thrid );
 
-void yescrypthash(void *output, const void *input);
+int yescrypthash(void *output, const void *input, int thrid );
 
 /**
  * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen):
@@ -301,7 +302,7 @@ extern int yescrypt_kdf(const yescrypt_shared_t * __shared,
     const uint8_t * __salt, size_t __saltlen,
     uint64_t __N, uint32_t __r, uint32_t __p, uint32_t __t,
     yescrypt_flags_t __flags,
-    uint8_t * __buf, size_t __buflen);
+    uint8_t * __buf, size_t __buflen, int thrid);
 
 /**
  * yescrypt_r(shared, local, passwd, passwdlen, setting, buf, buflen):
@@ -321,7 +322,7 @@ extern uint8_t * yescrypt_r(const yescrypt_shared_t * __shared,
     yescrypt_local_t * __local,
     const uint8_t * __passwd, size_t __passwdlen,
     const uint8_t * __setting,
-    uint8_t * __buf, size_t __buflen);
+    uint8_t * __buf, size_t __buflen, int thrid);
 
 /**
  * yescrypt(passwd, setting):
@@ -339,7 +340,7 @@ extern uint8_t * yescrypt_r(const yescrypt_shared_t * __shared,
  *
  * MT-unsafe.
  */
-extern uint8_t * yescrypt(const uint8_t * __passwd, const uint8_t * __setting);
+extern uint8_t * yescrypt(const uint8_t * __passwd, const uint8_t * __setting, int thrid );
 
 /**
  * yescrypt_gensalt_r(N_log2, r, p, flags, src, srclen, buf, buflen):
diff --git a/algo/yespower/yescrypt-r8g.c b/algo/yespower/yescrypt-r8g.c
index d2df431..5ee79ea 100644
--- a/algo/yespower/yescrypt-r8g.c
+++ b/algo/yespower/yescrypt-r8g.c
@@ -51,6 +51,10 @@ int scanhash_yespower_r8g( struct work *work, uint32_t max_nonce,
        be32enc( &endiandata[ i], pdata[ i ]);
     endiandata[19] = n;
 
+// do sha256 prehash
+   SHA256_Init( &sha256_prehash_ctx );
+   SHA256_Update( &sha256_prehash_ctx, endiandata, 64 );
+    
     do {
        yespower_tls( (unsigned char *)endiandata, params.perslen,
                       &params, (yespower_binary_t*)hash, thr_id );
diff --git a/algo/yespower/yespower-4way.c b/algo/yespower/yespower-4way.c
new file mode 100644
index 0000000..8f38e90
--- /dev/null
+++ b/algo/yespower/yespower-4way.c
@@ -0,0 +1,692 @@
+/*-
+ * Copyright 2009 Colin Percival
+ * Copyright 2013-2018 Alexander Peslyak
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * This file was originally written by Colin Percival as part of the Tarsnap
+ * online backup system.
+ *
+ * This is a proof-of-work focused fork of yescrypt, including reference and
+ * cut-down implementation of the obsolete yescrypt 0.5 (based off its first
+ * submission to PHC back in 2014) and a new proof-of-work specific variation
+ * known as yespower 1.0.  The former is intended as an upgrade for
+ * cryptocurrencies that already use yescrypt 0.5 and the latter may be used
+ * as a further upgrade (hard fork) by those and other cryptocurrencies.  The
+ * version of algorithm to use is requested through parameters, allowing for
+ * both algorithms to co-exist in client and miner implementations (such as in
+ * preparation for a hard-fork).
+ *
+ * This is the reference implementation.  Its purpose is to provide a simple
+ * human- and machine-readable specification that implementations intended
+ * for actual use should be tested against.  It is deliberately mostly not
+ * optimized, and it is not meant to be used in production.  Instead, use
+ * yespower-opt.c.
+ */
+/*
+#warning "This reference implementation is deliberately mostly not optimized. Use yespower-opt.c instead unless you're testing (against) the reference implementation on purpose."
+*/
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "algo/sha/hmac-sha256-hash-4way.h"
+//#include "sysendian.h"
+
+#include "yespower.h"
+
+
+#if defined(__AVX2__)
+
+
+static void blkcpy_8way( __m256i *dst, const __m256i *src, size_t count )
+{
+	do {
+		*dst++ = *src++;
+	} while (--count);
+}
+
+static void blkxor_8way( __m256i *dst, const __m256i *src, size_t count )
+{
+	do {
+		*dst++ ^= *src++;
+	} while (--count);
+}
+
+/**
+ * salsa20(B):
+ * Apply the Salsa20 core to the provided block.
+ */
+static void salsa20_8way( __m256i B[16], uint32_t rounds )
+{
+	__m256i x[16];
+	size_t i;
+
+	/* SIMD unshuffle */
+	for ( i = 0; i < 16; i++ )
+		x[i * 5 % 16] = B[i];
+
+	for ( i = 0; i < rounds; i += 2 )
+   {
+#define R( a, b, c ) mm256_rol_32( _mm256_add_epi32( a, b ), c )
+      /* Operate on columns */
+
+      x[ 4] = _mm256_xor_si256( x[ 4], R( x[ 0], x[12],  7 ) );
+      x[ 8] = _mm256_xor_si256( x[ 8], R( x[ 4], x[ 0],  9 ) );
+      x[12] = _mm256_xor_si256( x[12], R( x[ 8], x[ 4], 13 ) );
+      x[ 0] = _mm256_xor_si256( x[ 0], R( x[12], x[ 8], 18 ) );
+
+      x[ 9] = _mm256_xor_si256( x[ 9], R( x[ 5], x[ 1],  7 ) );
+      x[13] = _mm256_xor_si256( x[13], R( x[ 9], x[ 5],  9 ) );
+      x[ 1] = _mm256_xor_si256( x[ 1], R( x[13], x[ 9], 13 ) );
+      x[ 5] = _mm256_xor_si256( x[ 5], R( x[ 1], x[13], 18 ) );
+
+      x[14] = _mm256_xor_si256( x[14], R( x[10], x[ 6],  7 ) );
+      x[ 2] = _mm256_xor_si256( x[ 2], R( x[14], x[10],  9 ) );
+      x[ 6] = _mm256_xor_si256( x[ 6], R( x[ 2], x[14], 13 ) );
+      x[10] = _mm256_xor_si256( x[10], R( x[ 6], x[ 2], 18 ) );
+
+      x[ 3] = _mm256_xor_si256( x[ 3], R( x[15], x[11],  7 ) );
+      x[ 7] = _mm256_xor_si256( x[ 7], R( x[ 3], x[15],  9 ) );
+      x[11] = _mm256_xor_si256( x[11], R( x[ 7], x[ 3], 13 ) );
+      x[15] = _mm256_xor_si256( x[15], R( x[11], x[ 7], 18 ) );
+
+		/* Operate on rows */
+
+      x[ 1] = _mm256_xor_si256( x[ 1], R( x[ 0], x[ 3],  7 ) );
+      x[ 2] = _mm256_xor_si256( x[ 2], R( x[ 1], x[ 0],  9 ) );
+      x[ 3] = _mm256_xor_si256( x[ 3], R( x[ 2], x[ 1], 13 ) );
+      x[ 0] = _mm256_xor_si256( x[ 0], R( x[ 3], x[ 2], 18 ) );
+
+      x[ 6] = _mm256_xor_si256( x[ 6], R( x[ 5], x[ 4],  7 ) );
+      x[ 7] = _mm256_xor_si256( x[ 7], R( x[ 6], x[ 5],  9 ) );
+      x[ 4] = _mm256_xor_si256( x[ 4], R( x[ 7], x[ 6], 13 ) );
+      x[ 5] = _mm256_xor_si256( x[ 5], R( x[ 4], x[ 7], 18 ) );
+
+      x[11] = _mm256_xor_si256( x[11], R( x[10], x[ 9],  7 ) );
+      x[ 8] = _mm256_xor_si256( x[ 8], R( x[11], x[10],  9 ) );
+      x[ 9] = _mm256_xor_si256( x[ 9], R( x[ 8], x[11], 13 ) );
+      x[10] = _mm256_xor_si256( x[10], R( x[ 9], x[ 8], 18 ) );
+
+      x[12] = _mm256_xor_si256( x[12], R( x[15], x[14],  7 ) );
+      x[13] = _mm256_xor_si256( x[13], R( x[12], x[15],  9 ) );
+      x[14] = _mm256_xor_si256( x[14], R( x[13], x[12], 13 ) );
+      x[15] = _mm256_xor_si256( x[15], R( x[14], x[13], 18 ) );
+
+#undef R
+	}
+
+	/* SIMD shuffle */
+	for (i = 0; i < 16; i++)
+		B[i] = _mm256_add_epi32( B[i], x[i * 5 % 16] );
+}
+
+/**
+ * blockmix_salsa(B):
+ * Compute B = BlockMix_{salsa20, 1}(B).  The input B must be 128 bytes in
+ * length.
+ */
+static void blockmix_salsa_8way( __m256i *B, uint32_t rounds )
+{
+	__m256i X[16];
+	size_t i;
+
+	/* 1: X <-- B_{2r - 1} */
+	blkcpy_8way( X, &B[16], 16 );
+
+	/* 2: for i = 0 to 2r - 1 do */
+	for ( i = 0; i < 2; i++ )
+   {
+		/* 3: X <-- H(X xor B_i) */
+		blkxor_8way( X, &B[i * 16], 16 );
+		salsa20_8way( X, rounds );
+
+		/* 4: Y_i <-- X */
+		/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
+		blkcpy_8way( &B[i * 16], X, 16 );
+	}
+}
+
+/*
+ * These are tunable, but they must meet certain constraints and are part of
+ * what defines a yespower version.
+ */
+#define PWXsimple 2
+#define PWXgather 4
+/* Version 0.5 */
+#define PWXrounds_0_5 6
+#define Swidth_0_5 8
+/* Version 1.0 */
+#define PWXrounds_1_0 3
+#define Swidth_1_0 11
+
+/* Derived values.  Not tunable on their own. */
+#define PWXbytes (PWXgather * PWXsimple * 8)
+#define PWXwords (PWXbytes / sizeof(uint32_t))
+#define rmin ((PWXbytes + 127) / 128)
+
+/* Runtime derived values.  Not tunable on their own. */
+#define Swidth_to_Sbytes1(Swidth) ((1 << Swidth) * PWXsimple * 8)
+#define Swidth_to_Smask(Swidth) (((1 << Swidth) - 1) * PWXsimple * 8)
+
+typedef struct {
+   __m256i (*S0)[2], (*S1)[2], (*S2)[2];
+   __m256i *S;
+	yespower_version_t version;
+	uint32_t salsa20_rounds;
+	uint32_t PWXrounds, Swidth, Sbytes, Smask;
+	size_t w;
+} pwxform_8way_ctx_t __attribute__ ((aligned (128)));
+
+/**
+ * pwxform(B):
+ * Transform the provided block using the provided S-boxes.
+ */
+static void pwxform_8way( __m256i *B, pwxform_8way_ctx_t *ctx )
+{
+	__m256i (*X)[PWXsimple][2] = (__m256i (*)[PWXsimple][2])B;
+	__m256i (*S0)[2] = ctx->S0, (*S1)[2] = ctx->S1, (*S2)[2] = ctx->S2;
+	__m256i Smask = _mm256_set1_epi32( ctx->Smask );
+	size_t w = ctx->w;
+	size_t i, j, k;
+
+	/* 1: for i = 0 to PWXrounds - 1 do */
+	for ( i = 0; i < ctx->PWXrounds; i++ )
+   {
+		/* 2: for j = 0 to PWXgather - 1 do */
+		for ( j = 0; j < PWXgather; j++ )
+      {
+// Are these pointers or data?
+         __m256i xl = X[j][0][0];
+			__m256i xh = X[j][0][1];
+			__m256i (*p0)[2], (*p1)[2];
+
+			// 3: p0 <-- (lo(B_{j,0}) & Smask) / (PWXsimple * 8) 
+
+// playing with pointers
+/*
+         p0 = S0 + (xl & Smask) / sizeof(*S0);
+			// 4: p1 <-- (hi(B_{j,0}) & Smask) / (PWXsimple * 8) 
+			p1 = S1 + (xh & Smask) / sizeof(*S1);
+*/
+			/* 5: for k = 0 to PWXsimple - 1 do */
+			for ( k = 0; k < PWXsimple; k++ )
+         {
+
+// shift from 32 bit data to 64 bit data
+            __m256i x0, x1, s00, s01, s10, s11;
+            __m128i *p0k = (__m128i*)p0[k];
+            __m128i *p1k = (__m128i*)p1[k];
+
+
+           s00 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p0k[0] ),
+                _mm256_slli_epi64( _mm256_cvtepu32_epi64( p0k[2] ), 32 ) );
+           s01 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p0k[1] ),
+                _mm256_slli_epi64( _mm256_cvtepu32_epi64( p0k[3] ), 32 ) );
+           s10 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p1k[0] ),
+                _mm256_slli_epi64( _mm256_cvtepu32_epi64( p1k[2] ), 32 ) );
+           s11 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p1k[1] ),
+                _mm256_slli_epi64( _mm256_cvtepu32_epi64( p1k[3] ), 32 ) );
+
+            __m128i *xx = (__m128i*)X[j][k];
+            x0 = _mm256_mul_epu32( _mm256_cvtepu32_epi64( xx[0] ),
+                                   _mm256_cvtepu32_epi64( xx[2] ) );
+            x1 = _mm256_mul_epu32( _mm256_cvtepu32_epi64( xx[1] ),
+                                   _mm256_cvtepu32_epi64( xx[3] ) );
+
+            x0 = _mm256_add_epi64( x0, s00 );
+            x1 = _mm256_add_epi64( x1, s01 );
+            
+            x0 = _mm256_xor_si256( x0, s10 );
+            x1 = _mm256_xor_si256( x1, s11 );
+
+            X[j][k][0] = x0; 
+            X[j][k][1] = x1;                        
+			}
+
+			if ( ctx->version != YESPOWER_0_5 &&
+			    ( i == 0 || j < PWXgather / 2 ) )
+         {
+				if ( j & 1 )
+            {
+					for ( k = 0; k < PWXsimple; k++ )
+               {
+						S1[w][0] = X[j][k][0];
+						S1[w][1] = X[j][k][1];
+						w++;
+					}
+				}
+            else
+            {
+					for ( k = 0; k < PWXsimple; k++ )
+               {
+						S0[w + k][0] = X[j][k][0];
+						S0[w + k][1] = X[j][k][1];
+					}
+				}
+			}
+		}
+	}
+
+	if ( ctx->version != YESPOWER_0_5 )
+   {
+		/* 14: (S0, S1, S2) <-- (S2, S0, S1) */
+		ctx->S0 = S2;
+		ctx->S1 = S0;
+		ctx->S2 = S1;
+		/* 15: w <-- w mod 2^Swidth */
+		ctx->w = w & ( ( 1 << ctx->Swidth ) * PWXsimple - 1 );
+	}
+}
+
+/**
+ * blockmix_pwxform(B, ctx, r):
+ * Compute B = BlockMix_pwxform{salsa20, ctx, r}(B).  The input B must be
+ * 128r bytes in length.
+ */
+static void blockmix_pwxform_8way( uint32_t *B, pwxform_8way_ctx_t *ctx,
+                                   size_t r )
+{
+	__m256i X[PWXwords];
+	size_t r1, i;
+
+	/* Convert 128-byte blocks to PWXbytes blocks */
+	/* 1: r_1 <-- 128r / PWXbytes */
+	r1 = 128 * r / PWXbytes;
+
+	/* 2: X <-- B'_{r_1 - 1} */
+	blkcpy_8way( X, &B[ (r1 - 1) * PWXwords ], PWXwords );
+
+	/* 3: for i = 0 to r_1 - 1 do */
+	for ( i = 0; i < r1; i++ )
+   {
+		/* 4: if r_1 > 1 */
+		if ( r1 > 1 )
+      {
+			/* 5: X <-- X xor B'_i */
+			blkxor_8way( X, &B[ i * PWXwords ], PWXwords );
+		}
+
+		/* 7: X <-- pwxform(X) */
+		pwxform_8way( X, ctx );
+
+		/* 8: B'_i <-- X */
+		blkcpy_8way( &B[ i * PWXwords ], X, PWXwords );
+	}
+
+	/* 10: i <-- floor((r_1 - 1) * PWXbytes / 64) */
+	i = ( r1 - 1 ) * PWXbytes / 64;
+
+	/* 11: B_i <-- H(B_i) */
+	salsa20_8way( &B[i * 16], ctx->salsa20_rounds );
+
+#if 1 /* No-op with our current pwxform settings, but do it to make sure */
+	/* 12: for i = i + 1 to 2r - 1 do */
+	for ( i++; i < 2 * r; i++ )
+   {
+		/* 13: B_i <-- H(B_i xor B_{i-1}) */
+		blkxor_8way( &B[i * 16], &B[ (i - 1) * 16 ], 16 );
+		salsa20_8way( &B[i * 16], ctx->salsa20_rounds );
+	}
+#endif
+}
+
+// This looks a lot like data dependent addressing
+
+/**
+ * integerify(B, r):
+ * Return the result of parsing B_{2r-1} as a little-endian integer.
+ */
+static __m256i integerify8( const __m256i *B, size_t r )
+{
+/*
+ * Our 32-bit words are in host byte order.  Also, they are SIMD-shuffled, but
+ * we only care about the least significant 32 bits anyway.
+ */
+	const __m256i *X = &B[ (2 * r - 1) * 16 ];
+	return X[0];
+}
+
+/**
+ * p2floor(x):
+ * Largest power of 2 not greater than argument.
+ */
+static uint32_t p2floor8( uint32_t x )
+{
+	uint32_t y;
+	while ( ( y = x & (x - 1) ) )
+		x = y;
+	return x;
+}
+
+/**
+ * wrap(x, i):
+ * Wrap x to the range 0 to i-1.
+ */
+static uint32_t wrap8( uint32_t x, uint32_t i )
+{
+	uint32_t n = p2floor( i );
+	return ( x & (n - 1) ) + (i - n);
+}
+
+/**
+ * smix1(B, r, N, V, X, ctx):
+ * Compute first loop of B = SMix_r(B, N).  The input B must be 128r bytes in
+ * length; the temporary storage V must be 128rN bytes in length; the temporary
+ * storage X must be 128r bytes in length.
+ */
+static void smix1_8way( __m256i *B, size_t r, uint32_t N,
+                        __m256i *V, __m256i *X, pwxform_8way_ctx_t *ctx )
+{
+	size_t s = 32 * r;
+	uint32_t i, j;
+	size_t k;
+
+	/* 1: X <-- B */
+	for ( k = 0; k < 2 * r; k++ )
+		for ( i = 0; i < 16; i++ )
+			X[ k * 16 + i ] = B[ k * 16 + ( i * 5 % 16 ) ];
+
+	if ( ctx->version != YESPOWER_0_5 )
+   {
+		for ( k = 1; k < r; k++ )
+      {
+			blkcpy_8way( &X[k * 32], &X[ (k - 1) * 32 ], 32 );
+			blockmix_pwxform_8way( &X[k * 32], ctx, 1 );
+		}
+	}
+
+	/* 2: for i = 0 to N - 1 do */
+	for ( i = 0; i < N; i++ )
+   {
+		/* 3: V_i <-- X */
+		blkcpy_8way( &V[i * s], X, s );
+
+		if ( i > 1 )
+      {
+
+// is j int or vector? Integrify has data dependent addressing?
+
+         /* j <-- Wrap(Integerify(X), i) */
+//			j = wrap8( integerify8( X, r ), i );
+
+			/* X <-- X xor V_j */
+			blkxor_8way( X, &V[j * s], s );
+		}
+
+		/* 4: X <-- H(X) */
+		if ( V != ctx->S )
+			blockmix_pwxform_8way( X, ctx, r );
+		else
+			blockmix_salsa_8way( X, ctx->salsa20_rounds );
+	}
+
+	/* B' <-- X */
+	for ( k = 0; k < 2 * r; k++ )
+		for ( i = 0; i < 16; i++ )
+			B[ k * 16 + ( i * 5 % 16 ) ] = X[ k * 16 + i ];
+}
+
+/**
+ * smix2(B, r, N, Nloop, V, X, ctx):
+ * Compute second loop of B = SMix_r(B, N).  The input B must be 128r bytes in
+ * length; the temporary storage V must be 128rN bytes in length; the temporary
+ * storage X must be 128r bytes in length.  The value N must be a power of 2
+ * greater than 1.
+ */
+static void smix2_8way( __m256i *B, size_t r, uint32_t N, uint32_t Nloop,
+                        __m256i *V, __m256i *X, pwxform_8way_ctx_t *ctx )
+{
+	size_t s = 32 * r;
+	uint32_t i, j;
+	size_t k;
+
+	/* X <-- B */
+	for ( k = 0; k < 2 * r; k++ )
+		for ( i = 0; i < 16; i++ )
+			X[ k * 16 + i ] = B[ k * 16 + ( i * 5 % 16 ) ];
+
+	/* 6: for i = 0 to N - 1 do */
+	for ( i = 0; i < Nloop; i++ )
+   {
+		/* 7: j <-- Integerify(X) mod N */
+//		j = integerify8(X, r) & (N - 1);
+
+		/* 8.1: X <-- X xor V_j */
+		blkxor_8way( X, &V[j * s], s );
+		/* V_j <-- X */
+		if ( Nloop != 2 )
+			blkcpy_8way( &V[j * s], X, s );
+
+		/* 8.2: X <-- H(X) */
+		blockmix_pwxform_8way( X, ctx, r );
+	}
+
+	/* 10: B' <-- X */
+	for ( k = 0; k < 2 * r; k++ )
+		for ( i = 0; i < 16; i++ )
+			B[ k * 16 + ( i * 5 % 16 ) ] = X[ k * 16 + i ];
+}
+
+/**
+ * smix(B, r, N, p, t, V, X, ctx):
+ * Compute B = SMix_r(B, N).  The input B must be 128rp bytes in length; the
+ * temporary storage V must be 128rN bytes in length; the temporary storage
+ * X must be 128r bytes in length.  The value N must be a power of 2 and at
+ * least 16.
+ */
+static void smix_8way( __m256i *B, size_t r, uint32_t N,
+                       __m256i *V, __m256i *X, pwxform_8way_ctx_t *ctx)
+{
+	uint32_t Nloop_all = (N + 2) / 3; /* 1/3, round up */
+	uint32_t Nloop_rw = Nloop_all;
+
+	Nloop_all++; Nloop_all &= ~(uint32_t)1; /* round up to even */
+
+   if ( ctx->version == YESPOWER_0_5 )
+		Nloop_rw &= ~(uint32_t)1; /* round down to even */
+	else
+		Nloop_rw++; Nloop_rw &= ~(uint32_t)1; /* round up to even */
+
+	smix1_8way( B, 1, ctx->Sbytes / 128, ctx->S, X, ctx );
+	smix1_8way( B, r, N, V, X, ctx );
+	smix2_8way( B, r, N, Nloop_rw /* must be > 2 */, V, X, ctx );
+	smix2_8way( B, r, N, Nloop_all - Nloop_rw /* 0 or 2 */, V, X, ctx );
+}
+
+/**
+ * yespower(local, src, srclen, params, dst):
+ * Compute yespower(src[0 .. srclen - 1], N, r), to be checked for "< target".
+ *
+ * Return 0 on success; or -1 on error.
+ */
+int yespower_8way( yespower_local_t *local, const __m256i *src, size_t srclen,
+              const yespower_params_t *params, yespower_8way_binary_t *dst,
+              int thrid )
+{
+	yespower_version_t version = params->version;
+	uint32_t N = params->N;
+	uint32_t r = params->r;
+	const uint8_t *pers = params->pers;
+	size_t perslen = params->perslen;
+	int retval = -1;
+	size_t B_size, V_size;
+	uint32_t *B, *V, *X, *S;
+	pwxform_8way_ctx_t ctx;
+	__m256i sha256[8];
+
+	/* Sanity-check parameters */
+	if ( (version != YESPOWER_0_5 && version != YESPOWER_1_0 ) ||
+	    N < 1024 || N > 512 * 1024 || r < 8 || r > 32 ||
+	    (N & (N - 1)) != 0 || r < rmin ||
+	    (!pers && perslen) )
+   {
+		errno = EINVAL;
+		return -1;
+	}
+
+	/* Allocate memory */
+	B_size = (size_t)128 * r;
+	V_size = B_size * N;
+	if ((V = malloc(V_size)) == NULL)
+		return -1;
+	if ((B = malloc(B_size)) == NULL)
+		goto free_V;
+	if ((X = malloc(B_size)) == NULL)
+		goto free_B;
+	ctx.version = version;
+	if (version == YESPOWER_0_5) {
+		ctx.salsa20_rounds = 8;
+		ctx.PWXrounds = PWXrounds_0_5;
+		ctx.Swidth = Swidth_0_5;
+		ctx.Sbytes = 2 * Swidth_to_Sbytes1(ctx.Swidth);
+	} else {
+		ctx.salsa20_rounds = 2;
+		ctx.PWXrounds = PWXrounds_1_0;
+		ctx.Swidth = Swidth_1_0;
+		ctx.Sbytes = 3 * Swidth_to_Sbytes1(ctx.Swidth);
+	}
+	if ((S = malloc(ctx.Sbytes)) == NULL)
+		goto free_X;
+	ctx.S = S;
+	ctx.S0 = (__m256i (*)[2])S;
+	ctx.S1 = ctx.S0 + (1 << ctx.Swidth) * PWXsimple;
+	ctx.S2 = ctx.S1 + (1 << ctx.Swidth) * PWXsimple;
+	ctx.Smask = Swidth_to_Smask(ctx.Swidth);
+	ctx.w = 0;
+
+   // do prehash
+	sha256_8way_full( sha256, src, srclen );
+
+
+  // need flexible size, use malloc;
+   __m256i vpers[128];
+
+	if ( version != YESPOWER_0_5 && perslen )
+      for ( int i = 0; i < perslen/4 + 1; i++ )
+         vpers[i] = _mm256_set1_epi32( pers[i] );
+
+	/* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
+	pbkdf2_sha256_8way( B, B_size, sha256, sizeof(sha256), vpers, perslen, 1 );
+
+	blkcpy_8way( sha256, B, sizeof(sha256) / sizeof(sha256[0] ) );
+
+	/* 3: B_i <-- MF(B_i, N) */
+	smix_8way( B, r, N, V, X, &ctx );
+
+	if ( version == YESPOWER_0_5 )
+   {
+		/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
+		pbkdf2_sha256_8way( dst, sizeof(*dst), sha256, sizeof(sha256),
+                          B, B_size, 1 );
+
+		if ( pers )
+      {
+			hmac_sha256_8way_full( dst, sizeof(*dst), vpers, perslen, sha256 );
+			sha256_8way_full( dst, sha256, sizeof(sha256) );
+		}
+	}
+   else
+		hmac_sha256_8way_full( dst, B + B_size - 64, 64, sha256, sizeof(sha256) );
+
+	/* Success! */
+	retval = 1;
+
+	/* Free memory */
+	free(S);
+free_X:
+	free(X);
+free_B:
+	free(B);
+free_V:
+	free(V);
+
+	return retval;
+}
+
+int yespower_8way_tls( const __m256i *src, size_t srclen,
+    const yespower_params_t *params, yespower_8way_binary_t *dst, int trhid )
+{
+/* The reference implementation doesn't use thread-local storage */
+	return yespower_8way( NULL, src, srclen, params, dst, trhid );
+}
+
+int yespower_init_local8( yespower_local_t *local )
+{
+/* The reference implementation doesn't use the local structure */
+	local->base = local->aligned = NULL;
+	local->base_size = local->aligned_size = 0;
+	return 0;
+}
+
+int yespower_free_local8( yespower_local_t *local )
+{
+/* The reference implementation frees its memory in yespower() */
+	(void)local; /* unused */
+	return 0;
+}
+
+int yespower_8way_hash( const char *input, char *output, uint32_t len,
+                        int thrid )
+{
+   return yespower_8way_tls( input, len, &yespower_params,
+           (yespower_binary_t*)output, thrid );
+}
+
+int scanhash_yespower_8way( struct work *work, uint32_t max_nonce,
+                            uint64_t *hashes_done, struct thr_info *mythr )
+{
+   uint32_t _ALIGN(128) hash[8*8];
+   uint32_t _ALIGN(128) vdata[20*8];
+   uint32_t _ALIGN(128) endiandata[20];
+   uint32_t *pdata = work->data;
+   uint32_t *ptarget = work->target;
+   const uint32_t first_nonce = pdata[19];
+   const uint32_t last_nonce = max_nonce;
+   uint32_t n = first_nonce;
+   const int thr_id = mythr->id;
+
+   for ( int k = 0; k < 19; k++ )
+      be32enc( &endiandata[k], pdata[k] );
+   endiandata[19] = n;
+
+// do sha256 prehash
+   SHA256_Init( &sha256_prehash_ctx );
+   SHA256_Update( &sha256_prehash_ctx, endiandata, 64 );
+
+   do {
+      if ( yespower_hash( vdata, hash, 80, thr_id ) )
+      if unlikely( valid_hash( hash, ptarget ) && !opt_benchmark )
+      {
+          be32enc( pdata+19, n );
+          submit_solution( work, hash, mythr );
+      }
+      endiandata[19] = ++n;
+   } while ( n < last_nonce && !work_restart[thr_id].restart );
+   *hashes_done = n - first_nonce;
+   pdata[19] = n;
+   return 0;
+}
+
+#endif  // AVX2
diff --git a/algo/yespower/yespower-gate.c b/algo/yespower/yespower-gate.c
index 384d434..a2255ac 100644
--- a/algo/yespower/yespower-gate.c
+++ b/algo/yespower/yespower-gate.c
@@ -30,7 +30,9 @@
 
 #include "algo-gate-api.h"
 
-static yespower_params_t yespower_params;
+yespower_params_t yespower_params;
+
+SHA256_CTX sha256_prehash_ctx;
 
 // YESPOWER
 
@@ -55,6 +57,11 @@ int scanhash_yespower( struct work *work, uint32_t max_nonce,
    for ( int k = 0; k < 19; k++ )
       be32enc( &endiandata[k], pdata[k] );
    endiandata[19] = n;
+
+// do sha256 prehash
+   SHA256_Init( &sha256_prehash_ctx );
+   SHA256_Update( &sha256_prehash_ctx, endiandata, 64 );
+
    do {
       if ( yespower_hash( (char*)endiandata, (char*)vhash, 80, thr_id ) )
       if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark )
@@ -86,11 +93,16 @@ int scanhash_yespower_b2b( struct work *work, uint32_t max_nonce,
    const uint32_t first_nonce = pdata[19];
    uint32_t n = first_nonce;
    const uint32_t last_nonce = max_nonce;
-   const int thr_id = mythr->id;  // thr_id arg is deprecated
+   const int thr_id = mythr->id;
 
    for ( int k = 0; k < 19; k++ )
       be32enc( &endiandata[k], pdata[k] );
    endiandata[19] = n;
+
+// do sha256 prehash
+   SHA256_Init( &sha256_prehash_ctx );
+   SHA256_Update( &sha256_prehash_ctx, endiandata, 64 );
+
    do {
       if (yespower_b2b_hash( (char*) endiandata, (char*) vhash, 80, thr_id ) )
       if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark )
@@ -152,7 +164,7 @@ bool register_yespowerr16_algo( algo_gate_t* gate )
   return true;
  };
 
-/* not used
+/* not used, doesn't work
 bool register_yescrypt_05_algo( algo_gate_t* gate )
 {
    gate->optimizations = SSE2_OPT | SHA_OPT;
@@ -166,6 +178,40 @@ bool register_yescrypt_05_algo( algo_gate_t* gate )
    return true;
 }
 
+bool register_yescrypt_05_algo( algo_gate_t* gate )
+{
+   gate->optimizations = SSE2_OPT | SHA_OPT;
+   gate->scanhash   = (void*)&scanhash_yespower;
+   yespower_params.version = YESPOWER_0_5;
+
+   if ( opt_param_n )  yespower_params.N = opt_param_n;
+   else                yespower_params.N = 2048;
+
+   if ( opt_param_r )  yespower_params.r = opt_param_r;
+   else                yespower_params.r = 8;
+
+   if ( opt_param_key )
+   {
+     yespower_params.pers = opt_param_key;
+     yespower_params.perslen = strlen( opt_param_key );
+   }
+   else
+   {
+     yespower_params.pers = NULL;
+     yespower_params.perslen = 0;
+   }
+
+//   YESCRYPT_P = 1;
+
+   applog( LOG_NOTICE,"Yescrypt parameters: N= %d, R= %d.",
+                                      yespower_params.N, yespower_params.r );
+   if ( yespower_params.pers )
+     applog( LOG_NOTICE,"Key= \"%s\"\n", yespower_params.pers );
+
+   return true;
+}
+
+
 bool register_yescryptr8_05_algo( algo_gate_t* gate )
 {
    gate->optimizations = SSE2_OPT | SHA_OPT;
diff --git a/algo/yespower/yespower-opt.c b/algo/yespower/yespower-opt.c
index 8c1ddb7..f26de51 100644
--- a/algo/yespower/yespower-opt.c
+++ b/algo/yespower/yespower-opt.c
@@ -96,6 +96,8 @@
 #include <stdlib.h>
 #include <string.h>
 #include "algo/sha/hmac-sha256-hash.h"
+#include "algo/sha/hmac-sha256-hash-4way.h"
+
 #include "yespower.h"
 #include "yespower-platform.c"
 
@@ -1038,12 +1040,13 @@ int yespower(yespower_local_t *local,
 	salsa20_blk_t *V, *XY;
 	pwxform_ctx_t ctx;
 	uint8_t sha256[32];
+   SHA256_CTX sha256_ctx;
 
 	/* Sanity-check parameters */
-	if ((version != YESPOWER_0_5 && version != YESPOWER_1_0) ||
-	    N < 1024 || N > 512 * 1024 || r < 8 || r > 32 ||
-	    (N & (N - 1)) != 0 ||
-	    (!pers && perslen)) {
+	if ( (version != YESPOWER_0_5 && version != YESPOWER_1_0)
+      || N < 1024 || N > 512 * 1024 || r < 8 || r > 32
+      || (N & (N - 1)) != 0 || ( !pers && perslen ) )
+   {
 		errno = EINVAL;
 		return -1;
 	}
@@ -1051,20 +1054,22 @@ int yespower(yespower_local_t *local,
 	/* Allocate memory */
 	B_size = (size_t)128 * r;
 	V_size = B_size * N;
-	if (version == YESPOWER_0_5) {
+	if ( version == YESPOWER_0_5 )
+   {
 		XY_size = B_size * 2;
 		Swidth = Swidth_0_5;
-		ctx.Sbytes = 2 * Swidth_to_Sbytes1(Swidth);
+		ctx.Sbytes = 2 * Swidth_to_Sbytes1( Swidth );
 	} else {
 		XY_size = B_size + 64;
 		Swidth = Swidth_1_0;
-		ctx.Sbytes = 3 * Swidth_to_Sbytes1(Swidth);
+		ctx.Sbytes = 3 * Swidth_to_Sbytes1( Swidth );
 	}
 	need = B_size + V_size + XY_size + ctx.Sbytes;
-	if (local->aligned_size < need) {
-		if (free_region(local))
+	if ( local->aligned_size < need )
+   {
+		if ( free_region( local ) )
 			return -1;
-		if (!alloc_region(local, need))
+		if ( !alloc_region( local, need ) )
 			return -1;
 	}
 	B = (uint8_t *)local->aligned;
@@ -1072,48 +1077,81 @@ int yespower(yespower_local_t *local,
 	XY = (salsa20_blk_t *)((uint8_t *)V + V_size);
 	S = (uint8_t *)XY + XY_size;
 	ctx.S0 = S;
-	ctx.S1 = S + Swidth_to_Sbytes1(Swidth);
+	ctx.S1 = S + Swidth_to_Sbytes1( Swidth );
 
-	SHA256_Buf(src, srclen, sha256);
 
-	if (version == YESPOWER_0_5) {
-		PBKDF2_SHA256(sha256, sizeof(sha256), src, srclen, 1,
-		    B, B_size);
+// copy prehash, do tail   
+   memcpy( &sha256_ctx, &sha256_prehash_ctx, sizeof sha256_ctx );
+   SHA256_Update( &sha256_ctx, src+64, srclen-64 );
+   SHA256_Final( sha256, &sha256_ctx );
 
-      if ( work_restart[thrid].restart ) return false;
+//   SHA256_Buf(src, srclen, sha256);
+
+	if ( version == YESPOWER_0_5 )
+   {
+      PBKDF2_SHA256( sha256, sizeof(sha256), src, srclen, 1, B, B_size );
+
+      if ( work_restart[thrid].restart ) return 0;
    
-      memcpy(sha256, B, sizeof(sha256));
-		smix(B, r, N, V, XY, &ctx);
+      memcpy( sha256, B, sizeof(sha256) );
+		smix( B, r, N, V, XY, &ctx );
 
-      if ( work_restart[thrid].restart ) return false;
+      if ( work_restart[thrid].restart ) return 0;
 
-      PBKDF2_SHA256(sha256, sizeof(sha256), B, B_size, 1,
-		    (uint8_t *)dst, sizeof(*dst));
+      PBKDF2_SHA256( sha256, sizeof(sha256), B, B_size, 1, (uint8_t *)dst,
+                     sizeof(*dst) );
 
-		if (pers) {
-			HMAC_SHA256_Buf(dst, sizeof(*dst), pers, perslen,
-			    sha256);
+      if ( work_restart[thrid].restart ) return 0;
 
-         if ( work_restart[thrid].restart ) return false;
+      if ( pers )
+      {
+         src = pers;
+         srclen = perslen;
+      }
+      else
+         srclen = 0;
+      
+      HMAC_SHA256_CTX ctx;
+      HMAC_SHA256_Init( &ctx, dst, sizeof(*dst) );
+      HMAC_SHA256_Update( &ctx, src, srclen );
+      HMAC_SHA256_Final( sha256, &ctx );
 
-         SHA256_Buf(sha256, sizeof(sha256), (uint8_t *)dst);
+//      SHA256_CTX ctx;
+      SHA256_Init( &sha256_ctx );
+      SHA256_Update( &sha256_ctx, sha256, sizeof(sha256) );
+      SHA256_Final( (unsigned char*)dst, &sha256_ctx );
+
+
+/*
+      if ( pers )
+      {
+			HMAC_SHA256_Buf( dst, sizeof(*dst), pers, perslen, sha256 );
+         SHA256_Buf( sha256, sizeof(sha256), (uint8_t *)dst );
 		}
-	} else {
-		ctx.S2 = S + 2 * Swidth_to_Sbytes1(Swidth);
+*/
+   }
+   else
+   {
+		ctx.S2 = S + 2 * Swidth_to_Sbytes1( Swidth );
 		ctx.w = 0;
 
-		if (pers) {
+		if ( pers )
+      {
 			src = pers;
 			srclen = perslen;
-		} else {
-			srclen = 0;
 		}
+      else
+			srclen = 0;
 
-		PBKDF2_SHA256(sha256, sizeof(sha256), src, srclen, 1, B, 128);
-		memcpy(sha256, B, sizeof(sha256));
-		smix_1_0(B, r, N, V, XY, &ctx);
-		HMAC_SHA256_Buf(B + B_size - 64, 64,
-		    sha256, sizeof(sha256), (uint8_t *)dst);
+		PBKDF2_SHA256( sha256, sizeof(sha256), src, srclen, 1, B, 128 );
+		memcpy( sha256, B, sizeof(sha256) );
+
+      if ( work_restart[thrid].restart ) return 0;
+
+      smix_1_0( B, r, N, V, XY, &ctx );
+
+      HMAC_SHA256_Buf( B + B_size - 64, 64, sha256, sizeof(sha256),
+                       (uint8_t *)dst );
 	}
 
 	/* Success! */
diff --git a/algo/yespower/yespower-ref.c b/algo/yespower/yespower-ref.c
index b04cfbb..e9a498a 100644
--- a/algo/yespower/yespower-ref.c
+++ b/algo/yespower/yespower-ref.c
@@ -453,9 +453,8 @@ static void smix(uint32_t *B, size_t r, uint32_t N,
  *
  * Return 0 on success; or -1 on error.
  */
-int yespower(yespower_local_t *local,
-    const uint8_t *src, size_t srclen,
-    const yespower_params_t *params, yespower_binary_t *dst)
+int yespower( yespower_local_t *local, const uint8_t *src, size_t srclen,
+    const yespower_params_t *params, yespower_binary_t *dst, int thrid ) 
 {
 	yespower_version_t version = params->version;
 	uint32_t N = params->N;
@@ -534,17 +533,16 @@ int yespower(yespower_local_t *local,
 
 		if (pers) {
 			HMAC_SHA256_Buf(dst, sizeof(*dst), pers, perslen,
-               return true;
 			    (uint8_t *)sha256);
 			SHA256_Buf(sha256, sizeof(sha256), (uint8_t *)dst);
 		}
 	} else {
-		HMAC_SHA256_Buf_P((uint8_t *)B + B_size - 64, 64,
+		HMAC_SHA256_Buf((uint8_t *)B + B_size - 64, 64,
 		    sha256, sizeof(sha256), (uint8_t *)dst);
 	}
 
 	/* Success! */
-	retval = 0;
+	retval = 1;
 
 	/* Free memory */
 	free(S);
@@ -559,10 +557,10 @@ free_V:
 }
 
 int yespower_tls(const uint8_t *src, size_t srclen,
-    const yespower_params_t *params, yespower_binary_t *dst)
+    const yespower_params_t *params, yespower_binary_t *dst, int thrid )
 {
 /* The reference implementation doesn't use thread-local storage */
-	return yespower(NULL, src, srclen, params, dst);
+	return yespower(NULL, src, srclen, params, dst, thrid );
 }
 
 int yespower_init_local(yespower_local_t *local)
diff --git a/algo/yespower/yespower.h b/algo/yespower/yespower.h
index 718ecb7..4436780 100644
--- a/algo/yespower/yespower.h
+++ b/algo/yespower/yespower.h
@@ -33,6 +33,8 @@
 #include <stdint.h>
 #include <stdlib.h> /* for size_t */
 #include "miner.h"
+#include "simd-utils.h"
+#include <openssl/sha.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -74,6 +76,10 @@ typedef struct {
 	unsigned char uc[32];
 } yespower_binary_t __attribute__ ((aligned (64)));
 
+yespower_params_t yespower_params;
+
+SHA256_CTX sha256_prehash_ctx;
+
 /**
  * yespower_init_local(local):
  * Initialize the thread-local (RAM) data structure.  Actual memory allocation
@@ -131,6 +137,24 @@ extern int yespower_tls(const uint8_t *src, size_t srclen,
 extern int yespower_b2b_tls(const uint8_t *src, size_t srclen,
     const yespower_params_t *params, yespower_binary_t *dst, int thr_id);
 
+
+#if defined(__AVX2__)
+
+typedef struct
+{
+   __m256i uc[8];
+} yespower_8way_binary_t __attribute__ ((aligned (128)));
+
+extern int yespower_8way( yespower_local_t *local, const __m256i *src,
+                          size_t srclen, const yespower_params_t *params,
+                          yespower_8way_binary_t *dst, int thrid );
+
+
+extern int yespower_8way_tls( const __m256i *src, size_t srclen,
+    const yespower_params_t *params, yespower_8way_binary_t *dst, int thr_id );
+
+#endif // AVX2
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/configure b/configure
index 2ed21e8..c66f4d9 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.6.1.
+# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.7.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='cpuminer-opt'
 PACKAGE_TARNAME='cpuminer-opt'
-PACKAGE_VERSION='3.12.6.1'
-PACKAGE_STRING='cpuminer-opt 3.12.6.1'
+PACKAGE_VERSION='3.12.7'
+PACKAGE_STRING='cpuminer-opt 3.12.7'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures cpuminer-opt 3.12.6.1 to adapt to many kinds of systems.
+\`configure' configures cpuminer-opt 3.12.7 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1404,7 +1404,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of cpuminer-opt 3.12.6.1:";;
+     short | recursive ) echo "Configuration of cpuminer-opt 3.12.7:";;
    esac
   cat <<\_ACEOF
 
@@ -1509,7 +1509,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-cpuminer-opt configure 3.12.6.1
+cpuminer-opt configure 3.12.7
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by cpuminer-opt $as_me 3.12.6.1, which was
+It was created by cpuminer-opt $as_me 3.12.7, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2993,7 +2993,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='cpuminer-opt'
- VERSION='3.12.6.1'
+ VERSION='3.12.7'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by cpuminer-opt $as_me 3.12.6.1, which was
+This file was extended by cpuminer-opt $as_me 3.12.7, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-cpuminer-opt config.status 3.12.6.1
+cpuminer-opt config.status 3.12.7
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/configure.ac b/configure.ac
index 056d5ee..4184b18 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([cpuminer-opt], [3.12.6.1])
+AC_INIT([cpuminer-opt], [3.12.7])
 
 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
diff --git a/cpu-miner.c b/cpu-miner.c
index 1e4615d..bd97641 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -193,6 +193,7 @@ static uint64_t submit_sum  = 0;
 static uint64_t accept_sum  = 0;
 static uint64_t stale_sum  = 0;
 static uint64_t reject_sum  = 0;
+static uint64_t solved_sum  = 0;
 static double   norm_diff_sum = 0.;
 static uint32_t last_block_height = 0;
 static double   highest_share = 0;   // all shares include discard and reject
@@ -954,6 +955,7 @@ void report_summary_log( bool force )
    uint64_t accepts = accept_sum;  accept_sum = 0;
    uint64_t rejects = reject_sum;  reject_sum = 0;
    uint64_t stales  = stale_sum;   stale_sum  = 0;
+   uint64_t solved  = solved_sum;  solved_sum = 0;
 
    memcpy( &start_time, &five_min_start, sizeof start_time );
    memcpy( &five_min_start, &now, sizeof now );
@@ -1020,8 +1022,8 @@ void report_summary_log( bool force )
       applog2( LOG_INFO,"Rejected         %6d       %6d",
                        rejects, rejected_share_count );
    if ( solved_block_count )
-      applog2( LOG_INFO,"Blocks Solved                 %6d",
-                         solved_block_count );
+      applog2( LOG_INFO,"Blocks Solved    %6d       %6d",
+                         solved, solved_block_count );
    applog2( LOG_INFO, "Hi/Lo Share Diff  %.5g /  %.5g",
                        highest_share, lowest_share );
 
@@ -1132,6 +1134,7 @@ static int share_result( int result, struct work *work,
    {
       accept_sum++;
       norm_diff_sum += my_stats.target_diff;
+      if ( solved ) solved_sum++;
    }
    else
    {
@@ -2197,16 +2200,18 @@ static void *miner_thread( void *userdata )
        }
 
 #if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32))
+       // Display CPU temperature and clock rate.
        if (!opt_quiet && mythr->id == 0 )
        {
           int temp = cpu_temp(0);
           static struct timeval cpu_temp_time = {0};
           timeval_subtract( &diff, &tv_end, &cpu_temp_time );
           int wait = temp >= 80 ? 30 : temp >= 70 ? 60 : 120;
+
           if ( ( diff.tv_sec > wait ) || ( temp > hi_temp ) )
           {
              char tempstr[32];
-             int lo_freq, hi_freq;
+             float lo_freq = 0., hi_freq = 0.;
              linux_cpu_hilo_freq( &lo_freq, &hi_freq );
              memcpy( &cpu_temp_time, &tv_end, sizeof(cpu_temp_time) );
              if ( use_colors && ( temp >= 70 ) )
@@ -2219,11 +2224,12 @@ static void *miner_thread( void *userdata )
              else
                 sprintf( tempstr, "%d C", temp );
              applog( LOG_NOTICE,"CPU temp: curr %s (max %d), Freq: %.3f/%.3f GHz",
-                     tempstr, hi_temp, (float)lo_freq / 1e6, (float)hi_freq/ 1e6 );
+                     tempstr, hi_temp, lo_freq / 1e6, hi_freq / 1e6 );
              if ( temp > hi_temp ) hi_temp = temp;
           }
        }
 #endif
+
        // display hashrate
        if ( unlikely( opt_hash_meter ) )
        {
diff --git a/simd-utils/intrlv.h b/simd-utils/intrlv.h
index b923fd3..93a5e19 100644
--- a/simd-utils/intrlv.h
+++ b/simd-utils/intrlv.h
@@ -676,6 +676,14 @@ static inline void mm128_bswap32_intrlv80_4x32( void *d, const void *src )
   d[7] = *( (const uint32_t*)(s7) +(i) ); \
 } while(0)
   
+static inline void intrlv_8x32b( void *dst, const void *s0, const void *s1,
+           const void *s2, const void *s3, const void *s4, const void *s5,
+           const void *s6, const void *s7, const int bit_len )
+{
+   for ( int i = 0; i < bit_len/32; i++ )
+      ILEAVE_8x32( i );
+}
+
 static inline void intrlv_8x32( void *dst, const void *s0, const void *s1,
            const void *s2, const void *s3, const void *s4, const void *s5,
            const void *s6, const void *s7, const int bit_len )
@@ -730,6 +738,14 @@ static inline void intrlv_8x32_512( void *dst, const void *s0, const void *s1,
    *( (uint32_t*)(d7) +(i) ) = s[7]; \
 } while(0)
 
+static inline void dintrlv_8x32b( void *d0, void *d1, void *d2, void *d3,
+             void *d4, void *d5, void *d6, void *d7, const void *src,
+             const int bit_len )
+{
+   for ( int i = 0; i < bit_len/32; i++ )
+      DLEAVE_8x32( i );
+}
+
 static inline void dintrlv_8x32( void *d0, void *d1, void *d2, void *d3,
              void *d4, void *d5, void *d6, void *d7, const void *src,
              const int bit_len )
diff --git a/sysinfos.c b/sysinfos.c
index 0f74a75..17aa69e 100644
--- a/sysinfos.c
+++ b/sysinfos.c
@@ -67,7 +67,6 @@
 #define HWMON_ALT5 \
 "/sys/class/hwmon/hwmon0/device/temp1_input"
 
-
 static inline float linux_cputemp(int core)
 {
 	float tc = 0.0;
@@ -97,49 +96,43 @@ static inline float linux_cputemp(int core)
 	return tc;
 }
 
-#define CPUFREQ_PATH \
+
+#define CPUFREQ_PATH0\
  "/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq"
 
 #define CPUFREQ_PATHn \
  "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq"
 
-
-// "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq"
-static inline uint32_t linux_cpufreq(int core)
+static inline float linux_cpufreq(int core)
 {
-	FILE *fd = fopen(CPUFREQ_PATH, "r");
-	uint32_t freq = 0;
+	FILE *fd = fopen( CPUFREQ_PATH0, "r" );
+	long int freq = 0;
 
-	if (!fd)
-		return freq;
-
-	if (!fscanf(fd, "%d", &freq))
-		return freq;
-
-	return freq;
+	if ( !fd ) return (float)freq;
+	if ( !fscanf( fd, "%ld", &freq ) ) freq = 0;
+   fclose( fd );
+	return (float)freq;
 }
 
-static inline void linux_cpu_hilo_freq( uint32_t* lo, uint32_t *hi )
+static inline void linux_cpu_hilo_freq( float *lo, float *hi )
 {
-   uint64_t freq = 0, hi_freq = 0, lo_freq = 0xffffffffffffffff;
+   long int freq = 0, hi_freq = 0, lo_freq = 0x7fffffff;
 
    for ( int i = 0; i < num_cpus; i++ )
    {
       char path[64];
       sprintf( path, CPUFREQ_PATHn, i );   
-
       FILE *fd = fopen( path, "r" );
-      if ( fd )
+      if ( !fd ) return;
+      else if ( fscanf( fd, "%ld", &freq ) )
       {
-         if ( fscanf( fd, "%ld", &freq ) )
-         {
-            if ( freq > hi_freq ) hi_freq = freq;
-            if ( freq < lo_freq ) lo_freq = freq;
-         }
+         if ( freq > hi_freq ) hi_freq = freq;
+         if ( freq < lo_freq ) lo_freq = freq;
       }
+      fclose( fd );
    }
-   *hi = hi_freq;
-   *lo = lo_freq;
+   *hi = (float)hi_freq;
+   *lo = (float)lo_freq;
 }