v3.12.2

v3.12.1
v3.12.0.1
2025-09-17 23:44:27 +00:00 · 2020-02-09 13:30:40 -05:00 · 2020-02-07 20:18:20 -05:00 · 2020-02-06 22:50:20 -05:00 · 2020-02-05 22:50:58 -05:00
81 changed files with 2209 additions and 9544 deletions
--- a/Makefile.am
+++ b/Makefile.am
@@ -21,15 +21,6 @@ cpuminer_SOURCES = \
  api.c \
  sysinfos.c \
  algo-gate-api.c\
-  crypto/oaes_lib.c \
-  crypto/c_keccak.c \
-  crypto/c_groestl.c \
-  crypto/c_blake256.c \
-  crypto/c_jh.c \
-  crypto/c_skein.c \
-  crypto/hash.c \
-  crypto/aesb.c \
-  crypto/magimath.cpp \
  algo/argon2/argon2a/argon2a.c \
  algo/argon2/argon2a/ar2/argon2.c \
  algo/argon2/argon2a/ar2/opt.c \
@@ -76,10 +67,6 @@ cpuminer_SOURCES = \
  algo/bmw/bmw512-gate.c \
  algo/bmw/bmw512.c \
  algo/bmw/bmw512-4way.c \
-  algo/cryptonight/cryptolight.c \
-  algo/cryptonight/cryptonight-common.c\
-  algo/cryptonight/cryptonight-aesni.c\
-  algo/cryptonight/cryptonight.c\
  algo/cubehash/cubehash_sse2.c\
  algo/cubehash/cube-hash-2way.c \
  algo/echo/sph_echo.c \
@@ -141,7 +128,8 @@ cpuminer_SOURCES = \
  algo/lyra2/allium.c \
  algo/lyra2/phi2-4way.c \
  algo/lyra2/phi2.c \
-  algo/m7m.c \
+  algo//m7m/m7m.c \
+  algo/m7m/magimath.cpp \
  algo/nist5/nist5-gate.c \
  algo/nist5/nist5-4way.c \
  algo/nist5/nist5.c \
--- a/50
+++ b/50
@@ -65,10 +65,60 @@ If not what makes it happen or not happen?
 Change Log
 ----------

+v3.12.2
+
+Fixed xevan, skein, skein2 AVX2, #238.
+
+Reversed polarity of AVX2 vector bit test utilities, and all users, to be
+logically and semantically correct. Follow up to issue #236. 
+
+v3.12.1
+
+Fixed anime AVX2 low difficulty shares, git issue #236.
+
+Periodic summary now reports lost hash rate due to rejected and stale shares,
+displayed only when non-zero.
+
+v3.12.0.1
+
+Fixed hodl rejects, git issue #237.
+
+Fixed debug code added in v3.12.0 to work with AVX2 to be enabled only
+after low difficulty share have been seen to avoid unnecessarily excessive
+log outout.
+
+Added more digits of precision to diff in log output to help diagnose
+low difficulty shares.
+
+v3.12.0
+
+Faster phi2 AVX2 +62%, AVX512 +150% on Intel CPUs. AMD Ryzen AVX2 is
+YMMV due to its inferiour AVX2 implementation.
+
+Fixed Hodl stats, rejects are still an issue since v3.9.5, git issue #237.
+
+API can now be enabled with "-b port" or "--api-bind port".
+It will use the default address 127.0.0.1.
+
+Editorial: Short form options should only be used on the command line to save
+typing. Configuration files and scripts should always use the long form
+"--api-bind addr:port" without relying on any defaults. This is a general
+recommendation that applies to all options for any application.
+
+Removed obsolete cryptonight, all variants, and supporting code for more
+size reduction and faster compiling.
+
+Tweaked the timing of the CPU temperature and frequency log (Linux only).
+
+Added some debug code to collect more info aboout low difficulty rejects,
+git issue #236.
+
 v3.11.9

 Fixed x16r invalid shares when Luffa was first in hash order.

+API is disabled by default.
+
 New startup message for status of stratum connection, API & extranonce.

 New log report for CPU temperature, frequency of fastest and slowest cores.
--- a/algo-gate-api.c
+++ b/algo-gate-api.c
@@ -113,7 +113,6 @@ void init_algo_gate( algo_gate_t* gate )
   gate->hash                    = (void*)&null_hash;
   gate->hash_suw                = (void*)&null_hash_suw;
   gate->get_new_work            = (void*)&std_get_new_work;
-   gate->get_nonceptr            = (void*)&std_get_nonceptr;
   gate->work_decode             = (void*)&std_le_work_decode;
   gate->decode_extra_data       = (void*)&do_nothing;
   gate->gen_merkle_root         = (void*)&sha256d_gen_merkle_root;
@@ -129,7 +128,6 @@ void init_algo_gate( algo_gate_t* gate )
   gate->resync_threads          = (void*)&do_nothing;
   gate->do_this_thread          = (void*)&return_true;
   gate->longpoll_rpc_call       = (void*)&std_longpoll_rpc_call;
-   gate->stratum_handle_response = (void*)&std_stratum_handle_response;
   gate->get_work_data_size      = (void*)&std_get_work_data_size;
   gate->optimizations           = EMPTY_SET;
   gate->ntime_index             = STD_NTIME_INDEX;
@@ -168,9 +166,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
    case ALGO_BLAKECOIN:     register_blakecoin_algo     ( gate ); break;
    case ALGO_BMW512:        register_bmw512_algo        ( gate ); break;
    case ALGO_C11:           register_c11_algo           ( gate ); break;
-    case ALGO_CRYPTOLIGHT:   register_cryptolight_algo   ( gate ); break;
-    case ALGO_CRYPTONIGHT:   register_cryptonight_algo   ( gate ); break;
-    case ALGO_CRYPTONIGHTV7: register_cryptonightv7_algo ( gate ); break;
    case ALGO_DECRED:        register_decred_algo        ( gate ); break;
    case ALGO_DEEP:          register_deep_algo          ( gate ); break;
    case ALGO_DMD_GR:        register_dmd_gr_algo        ( gate ); break;
@@ -266,25 +261,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
 // restore warnings
 #pragma GCC diagnostic pop

-// override std defaults with jr2 defaults
-bool register_json_rpc2( algo_gate_t *gate )
-{
-//  gate->wait_for_diff           = (void*)&do_nothing;
-  gate->get_new_work            = (void*)&jr2_get_new_work;
-  gate->get_nonceptr            = (void*)&jr2_get_nonceptr;
-  gate->stratum_gen_work        = (void*)&jr2_stratum_gen_work;
-  gate->build_stratum_request   = (void*)&jr2_build_stratum_request;
-  gate->submit_getwork_result   = (void*)&jr2_submit_getwork_result;
-  gate->longpoll_rpc_call       = (void*)&jr2_longpoll_rpc_call;
-  gate->work_decode             = (void*)&jr2_work_decode;
-  gate->stratum_handle_response = (void*)&jr2_stratum_handle_response;
-  gate->nonce_index             = JR2_NONCE_INDEX;
-  jsonrpc_2 = true;   // still needed
-  opt_extranonce = false;
-//  have_gbt = false;
-  return true;
- }
-
 // run the alternate hash function for a specific algo
 void exec_hash_function( int algo, void *output, const void *pdata )
 {
@@ -350,7 +326,7 @@ void get_algo_alias( char** algo_or_alias )
    if ( !strcasecmp( *algo_or_alias, algo_alias_map[i][ ALIAS ] ) )
    {
      // found valid alias, return proper name
-      *algo_or_alias = (const char*)( algo_alias_map[i][ PROPER ] );
+      *algo_or_alias = (char*)( algo_alias_map[i][ PROPER ] );
      return;
    }
 }
--- a/algo-gate-api.h
+++ b/algo-gate-api.h
@@ -129,9 +129,6 @@ void ( *stratum_gen_work )      ( struct stratum_ctx*, struct work* );
 // Get thread local copy of blockheader with unique nonce.
 void ( *get_new_work )          ( struct work*, struct work*, int, uint32_t* );

-// Return pointer to nonce in blockheader.
-uint32_t *( *get_nonceptr )     ( uint32_t* );
-
 // Decode getwork blockheader
 bool ( *work_decode )           ( const json_t*, struct work* );

@@ -169,7 +166,6 @@ bool ( *do_this_thread )        ( int );
 void ( *resync_threads )        ( struct work* );

 json_t* (*longpoll_rpc_call)      ( CURL*, int*, char* );
-bool ( *stratum_handle_response ) ( json_t* );
 set_t optimizations;
 int  ( *get_work_data_size )     ();
 int  ntime_index;
@@ -222,31 +218,22 @@ void null_hash_suw();

 // optional safe targets, default listed first unless noted.

-uint32_t *std_get_nonceptr( uint32_t *work_data );
-uint32_t *jr2_get_nonceptr( uint32_t *work_data );
-
 void std_get_new_work( struct work *work, struct work *g_work, int thr_id,
                       uint32_t* end_nonce_ptr );
-void jr2_get_new_work( struct work *work, struct work *g_work, int thr_id,
-                       uint32_t* end_nonce_ptr );

 void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *work );
-void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *work );

 void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx );
 void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );

 bool std_le_work_decode( const json_t *val, struct work *work );
 bool std_be_work_decode( const json_t *val, struct work *work );
-bool jr2_work_decode(    const json_t *val, struct work *work );

 bool std_le_submit_getwork_result( CURL *curl, struct work *work );
 bool std_be_submit_getwork_result( CURL *curl, struct work *work );
-bool jr2_submit_getwork_result(    CURL *curl, struct work *work );

 void std_le_build_stratum_request( char *req, struct work *work );
 void std_be_build_stratum_request( char *req, struct work *work );
-void jr2_build_stratum_request   ( char *req, struct work *work );

 char* std_malloc_txs_request( struct work *work );

@@ -263,10 +250,10 @@ void std_build_block_header( struct work* g_work, uint32_t version,
 void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );

 json_t* std_longpoll_rpc_call( CURL *curl, int *err, char *lp_url );
-json_t* jr2_longpoll_rpc_call( CURL *curl, int *err );
+//json_t* jr2_longpoll_rpc_call( CURL *curl, int *err );

-bool std_stratum_handle_response( json_t *val );
-bool jr2_stratum_handle_response( json_t *val );
+//bool std_stratum_handle_response( json_t *val );
+//bool jr2_stratum_handle_response( json_t *val );

 bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
                        int thr_id );
@@ -288,7 +275,7 @@ bool register_algo( algo_gate_t *gate );
 // Overrides a common set of functions used by RPC2 and other RPC2-specific
 // init. Called by algo's register function before initializing algo-specific
 // functions and data.
-bool register_json_rpc2( algo_gate_t *gate );
+//bool register_json_rpc2( algo_gate_t *gate );

 // use this to call the hash function of an algo directly, ie util.c test.
 void exec_hash_function( int algo, void *output, const void *pdata );
--- a/algo/blake/decred-gate.c
+++ b/algo/blake/decred-gate.c
@@ -153,7 +153,7 @@ bool register_decred_algo( algo_gate_t* gate )
  gate->hash      = (void*)&decred_hash;
 #endif
  gate->optimizations = AVX2_OPT;
-  gate->get_nonceptr          = (void*)&decred_get_nonceptr;
+//  gate->get_nonceptr          = (void*)&decred_get_nonceptr;
  gate->decode_extra_data     = (void*)&decred_decode_extradata;
  gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
  gate->work_decode           = (void*)&std_be_work_decode;
--- a/algo/bmw/bmw-hash-4way.h
+++ b/algo/bmw/bmw-hash-4way.h
@@ -138,7 +138,7 @@ void bmw512_2way_close( bmw512_2way_context *ctx, void *dst );

 #if defined(__AVX2__)

-// BMW-512 4 way 64
+// BMW-512 64 bit 4 way

 typedef struct {
   __m256i buf[16];
@@ -149,7 +149,6 @@ typedef struct {

 typedef bmw_4way_big_context bmw512_4way_context;

-
 void bmw512_4way_init(void *cc);

 void bmw512_4way_update(void *cc, const void *data, size_t len);
@@ -164,6 +163,7 @@ void bmw512_4way_addbits_and_close(

 #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)

+// BMW-512 64 bit 8 way
 typedef struct {
   __m512i buf[16];
   __m512i H[16];
@@ -171,6 +171,8 @@ typedef struct {
   uint64_t bit_count;
 } bmw512_8way_context __attribute__((aligned(128)));

+void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data,
+                         size_t len );
 void bmw512_8way_init( bmw512_8way_context *ctx );
 void bmw512_8way_update( bmw512_8way_context *ctx, const void *data,
                         size_t len );
--- a/algo/bmw/bmw512-hash-4way.c
+++ b/algo/bmw/bmw512-hash-4way.c
@@ -1507,6 +1507,93 @@ void bmw512_8way_close( bmw512_8way_context *ctx, void *dst )
      casti_m512i( dst, u ) = h1[ v ];
 }

+void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data,
+                                size_t len )
+{
+   __m512i *vdata = (__m512i*)data;
+   __m512i *buf = ctx->buf;
+   __m512i htmp[16];
+   __m512i *H = ctx->H;
+   __m512i *h2 = htmp;
+   uint64_t bit_count = len * 8;
+   size_t ptr = 0;
+   const int buf_size = 128;  // bytes of one lane, compatible with len
+
+// Init
+
+   H[ 0] = m512_const1_64( 0x8081828384858687 );
+   H[ 1] = m512_const1_64( 0x88898A8B8C8D8E8F );
+   H[ 2] = m512_const1_64( 0x9091929394959697 );
+   H[ 3] = m512_const1_64( 0x98999A9B9C9D9E9F );
+   H[ 4] = m512_const1_64( 0xA0A1A2A3A4A5A6A7 );
+   H[ 5] = m512_const1_64( 0xA8A9AAABACADAEAF );
+   H[ 6] = m512_const1_64( 0xB0B1B2B3B4B5B6B7 );
+   H[ 7] = m512_const1_64( 0xB8B9BABBBCBDBEBF );
+   H[ 8] = m512_const1_64( 0xC0C1C2C3C4C5C6C7 );
+   H[ 9] = m512_const1_64( 0xC8C9CACBCCCDCECF );
+   H[10] = m512_const1_64( 0xD0D1D2D3D4D5D6D7 );
+   H[11] = m512_const1_64( 0xD8D9DADBDCDDDEDF );
+   H[12] = m512_const1_64( 0xE0E1E2E3E4E5E6E7 );
+   H[13] = m512_const1_64( 0xE8E9EAEBECEDEEEF );
+   H[14] = m512_const1_64( 0xF0F1F2F3F4F5F6F7 );
+   H[15] = m512_const1_64( 0xF8F9FAFBFCFDFEFF );
+
+// Update
+
+   while ( len > 0 )
+   {
+      size_t clen;
+      clen = buf_size - ptr;
+      if ( clen > len )
+         clen = len;
+      memcpy_512( buf + (ptr>>3), vdata, clen >> 3 );
+      vdata = vdata + (clen>>3);
+      len -= clen;
+      ptr += clen;
+      if ( ptr == buf_size )
+      {
+         __m512i *ht;
+         compress_big_8way( buf, H, h2 );
+         ht = H;
+         H = h2;
+         h2 = ht;
+         ptr = 0;
+      }
+   }
+   if ( H != ctx->H )
+      memcpy_512( ctx->H, H, 16 );
+
+// Close   
+{
+   __m512i h1[16], h2[16];
+   size_t u, v;
+
+   buf[ ptr>>3 ] = m512_const1_64( 0x80 );
+   ptr += 8;
+
+   if (  ptr > (buf_size - 8) )
+   {
+      memset_zero_512( buf + (ptr>>3), (buf_size - ptr) >> 3 );
+      compress_big_8way( buf, H, h1 );
+      ptr = 0;
+      H = h1;
+   }
+   memset_zero_512( buf + (ptr>>3), (buf_size - 8 - ptr) >> 3 );
+   buf[ (buf_size - 8) >> 3 ] = _mm512_set1_epi64( bit_count );
+   compress_big_8way( buf, H, h2 );
+   for ( u = 0; u < 16; u ++ )
+      buf[ u ] = h2[ u ];
+   compress_big_8way( buf, final_b8, h1 );
+   for (u = 0, v = 8; u < 8; u ++, v ++)
+      casti_m512i( out, u ) = h1[ v ];
+}
+
+
+
+}   
+
+
+
 #endif // AVX512

 #ifdef __cplusplus
--- a/algo/cryptonight/cryptolight.c
+++ b/algo/cryptonight/cryptolight.c
@@ -1,371 +0,0 @@
-// Copyright (c) 2012-2013 The Cryptonote developers
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#include "algo-gate-api.h"
-
-#if defined(__arm__) || defined(_MSC_VER)
-#ifndef NOASM
-#define NOASM
-#endif
-#endif
-
-#include "crypto/oaes_lib.h"
-#include "crypto/c_keccak.h"
-#include "crypto/c_groestl.h"
-#include "crypto/c_blake256.h"
-#include "crypto/c_jh.h"
-#include "crypto/c_skein.h"
-#include "crypto/int-util.h"
-#include "crypto/hash-ops.h"
-
-#if USE_INT128
-
-#if __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && __GNUC_MINOR__ < 6
-typedef unsigned int uint128_t __attribute__ ((__mode__ (TI)));
-#elif defined (_MSC_VER)
-/* only for mingw64 on windows */
-#undef  USE_INT128
-#define USE_INT128 (0)
-#else
-typedef __uint128_t uint128_t;
-#endif
-
-#endif
-
-#define LITE 1
-#if LITE /* cryptonight-light */
-#define MEMORY (1 << 20)
-#define ITER   (1 << 19)
-#else
-#define MEMORY (1 << 21) /* 2 MiB */
-#define ITER   (1 << 20)
-#endif
-
-#define AES_BLOCK_SIZE  16
-#define AES_KEY_SIZE    32 /*16*/
-#define INIT_SIZE_BLK   8
-#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
-
-#pragma pack(push, 1)
-union cn_slow_hash_state {
-	union hash_state hs;
-	struct {
-		uint8_t k[64];
-		uint8_t init[INIT_SIZE_BYTE];
-	};
-};
-#pragma pack(pop)
-
-static void do_blake_hash(const void* input, size_t len, char* output) {
-	blake256_hash((uint8_t*)output, input, len);
-}
-
-static void do_groestl_hash(const void* input, size_t len, char* output) {
-	groestl(input, len * 8, (uint8_t*)output);
-}
-
-static void do_jh_hash(const void* input, size_t len, char* output) {
-	int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
-	assert(likely(SUCCESS == r));
-}
-
-static void do_skein_hash(const void* input, size_t len, char* output) {
-	int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
-	assert(likely(SKEIN_SUCCESS == r));
-}
-
-extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
-extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
-#if !defined(_MSC_VER) && !defined(NOASM)
-extern int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
-extern int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
-#else
-#define fast_aesb_single_round     aesb_single_round
-#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
-#endif
-
-#if defined(NOASM) || !defined(__x86_64__)
-static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) {
-	// multiplier   = ab = a * 2^32 + b
-	// multiplicand = cd = c * 2^32 + d
-	// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
-	uint64_t a = hi_dword(multiplier);
-	uint64_t b = lo_dword(multiplier);
-	uint64_t c = hi_dword(multiplicand);
-	uint64_t d = lo_dword(multiplicand);
-
-	uint64_t ac = a * c;
-	uint64_t ad = a * d;
-	uint64_t bc = b * c;
-	uint64_t bd = b * d;
-
-	uint64_t adbc = ad + bc;
-	uint64_t adbc_carry = adbc < ad ? 1 : 0;
-
-	// multiplier * multiplicand = product_hi * 2^64 + product_lo
-	uint64_t product_lo = bd + (adbc << 32);
-	uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
-	*product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
-	assert(ac <= *product_hi);
-
-	return product_lo;
-}
-#else
-extern uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi);
-#endif
-
-static void (* const extra_hashes[4])(const void *, size_t, char *) = {
-		do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
-};
-
-
-static inline size_t e2i(const uint8_t* a) {
-#if !LITE
-	return ((uint32_t *)a)[0] & 0x1FFFF0;
-#else
-	return ((uint32_t *)a)[0] & 0xFFFF0;
-#endif
-}
-
-static inline void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
-	uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
-	hi += ((uint64_t*) c)[0];
-
-	((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
-	((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
-	((uint64_t*) dst)[0] = hi;
-	((uint64_t*) dst)[1] = lo;
-}
-
-static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
-#if USE_INT128
-	*((uint128_t*) a) ^= *((uint128_t*) b);
-#else
-	((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
-	((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
-#endif
-}
-
-static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
-#if USE_INT128
-	*((uint128_t*) dst) = *((uint128_t*) a) ^ *((uint128_t*) b);
-#else
-	((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
-	((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
-#endif
-}
-
-struct cryptonight_ctx {
-	uint8_t _ALIGN(16) long_state[MEMORY];
-	union cn_slow_hash_state state;
-	uint8_t _ALIGN(16) text[INIT_SIZE_BYTE];
-	uint8_t _ALIGN(16) a[AES_BLOCK_SIZE];
-	uint8_t _ALIGN(16) b[AES_BLOCK_SIZE];
-	uint8_t _ALIGN(16) c[AES_BLOCK_SIZE];
-	oaes_ctx* aes_ctx;
-};
-
-static void cryptolight_hash_ctx(void* output, const void* input, int len, struct cryptonight_ctx* ctx)
-{
-        len = 76;
-	hash_process(&ctx->state.hs, (const uint8_t*) input, len);
-	ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
-	size_t i, j;
-	memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
-
-	oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
-	for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
-		aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data);
-		aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data);
-		aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data);
-		aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data);
-		aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data);
-		aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data);
-		aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data);
-		aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data);
-		memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
-	}
-
-	xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
-	xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
-
-	for (i = 0; likely(i < ITER / 4); ++i) {
-		/* Dependency chain: address -> read value ------+
-		 * written value <-+ hard function (AES or MUL) <+
-		 * next address  <-+
-		 */
-		/* Iteration 1 */
-		j = e2i(ctx->a);
-		aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
-		xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
-		/* Iteration 2 */
-		mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)]);
-		/* Iteration 3 */
-		j = e2i(ctx->a);
-		aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
-		xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
-		/* Iteration 4 */
-		mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)]);
-	}
-
-	memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
-	oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
-	for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
-		xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &ctx->long_state[i + 0 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-		xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &ctx->long_state[i + 1 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-		xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &ctx->long_state[i + 2 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-		xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &ctx->long_state[i + 3 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-		xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &ctx->long_state[i + 4 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-		xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &ctx->long_state[i + 5 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-		xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &ctx->long_state[i + 6 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-		xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &ctx->long_state[i + 7 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-	}
-	memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
-	hash_permutation(&ctx->state.hs);
-	/*memcpy(hash, &state, 32);*/
-	extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
-	oaes_free((OAES_CTX **) &ctx->aes_ctx);
-}
-
-void cryptolight_hash(void* output, const void* input, int len) {
-	struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
-	cryptolight_hash_ctx(output, input, len, ctx);
-	free(ctx);
-}
-
-#if defined(__AES__)
-
-static void cryptolight_hash_ctx_aes_ni(void* output, const void* input,
-                       int len, struct cryptonight_ctx* ctx)
-{
-	hash_process(&ctx->state.hs, (const uint8_t*)input, len);
-	ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
-	size_t i, j;
-	memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
-
-	oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
-	for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
-		fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data);
-		fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data);
-		fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data);
-		fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data);
-		fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data);
-		fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data);
-		fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data);
-		fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data);
-		memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
-	}
-
-	xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
-	xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
-
-	for (i = 0; likely(i < ITER / 4); ++i) {
-		/* Dependency chain: address -> read value ------+
-		 * written value <-+ hard function (AES or MUL) <+
-		 * next address  <-+
-		 */
-		/* Iteration 1 */
-		j = e2i(ctx->a);
-		fast_aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
-		xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
-		/* Iteration 2 */
-		mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)]);
-		/* Iteration 3 */
-		j = e2i(ctx->a);
-		fast_aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
-		xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
-		/* Iteration 4 */
-		mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)]);
-	}
-
-	memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
-	oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
-	for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
-		xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &ctx->long_state[i + 0 * AES_BLOCK_SIZE]);
-		fast_aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-		xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &ctx->long_state[i + 1 * AES_BLOCK_SIZE]);
-		fast_aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-		xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &ctx->long_state[i + 2 * AES_BLOCK_SIZE]);
-		fast_aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-		xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &ctx->long_state[i + 3 * AES_BLOCK_SIZE]);
-		fast_aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-		xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &ctx->long_state[i + 4 * AES_BLOCK_SIZE]);
-		fast_aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-		xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &ctx->long_state[i + 5 * AES_BLOCK_SIZE]);
-		fast_aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-		xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &ctx->long_state[i + 6 * AES_BLOCK_SIZE]);
-		fast_aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-		xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &ctx->long_state[i + 7 * AES_BLOCK_SIZE]);
-		fast_aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
-	}
-	memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
-	hash_permutation(&ctx->state.hs);
-	/*memcpy(hash, &state, 32);*/
-	extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
-	oaes_free((OAES_CTX **) &ctx->aes_ctx);
-}
-
-#endif
-
-int scanhash_cryptolight( struct work *work,
-		uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
-{
-        uint32_t *pdata = work->data;
-        uint32_t *ptarget = work->target;
-	uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
-	uint32_t n = *nonceptr - 1;
-	const uint32_t first_nonce = n + 1;
-	//const uint32_t Htarg = ptarget[7];
-	uint32_t _ALIGN(32) hash[HASH_SIZE / 4];
-   int thr_id = mythr->id;
-
-	struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
-
-#if defined(__AES__)
-		do {
-			*nonceptr = ++n;
-			cryptolight_hash_ctx_aes_ni(hash, pdata, 76, ctx);
-			if (unlikely(hash[7] < ptarget[7])) {
-				*hashes_done = n - first_nonce + 1;
-				free(ctx);
-				return true;
-			}
-		} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
-#else
-		do {
-			*nonceptr = ++n;
-			cryptolight_hash_ctx(hash, pdata, 76, ctx);
-			if (unlikely(hash[7] < ptarget[7])) {
-				*hashes_done = n - first_nonce + 1;
-				free(ctx);
-				return true;
-			}
-		} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
-#endif
-	free(ctx);
-	*hashes_done = n - first_nonce + 1;
-	return 0;
-}
-
-bool register_cryptolight_algo( algo_gate_t* gate )
-{
-  applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer");
-  applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
-  applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
-  register_json_rpc2( gate );
-  gate->optimizations = SSE2_OPT | AES_OPT;
-  gate->scanhash  = (void*)&scanhash_cryptolight;
-  gate->hash      = (void*)&cryptolight_hash;
-  gate->hash_suw  = (void*)&cryptolight_hash; 
-  return true;
-};
-
--- a/algo/cryptonight/cryptonight-aesni.c
+++ b/algo/cryptonight/cryptonight-aesni.c
@@ -1,357 +0,0 @@
-#if defined(__AES__)
-
-#include <x86intrin.h>
-#include <memory.h>
-#include "cryptonight.h"
-#include "miner.h"
-#include "crypto/c_keccak.h"
-#include <immintrin.h>
-
-static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
-{
-	__m128i tmp4;
-	*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
-	tmp4 = _mm_slli_si128(*tmp1, 0x04);
-	*tmp1 = _mm_xor_si128(*tmp1, tmp4);
-	tmp4 = _mm_slli_si128(tmp4, 0x04);
-	*tmp1 = _mm_xor_si128(*tmp1, tmp4);
-	tmp4 = _mm_slli_si128(tmp4, 0x04);
-	*tmp1 = _mm_xor_si128(*tmp1, tmp4);
-	*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
-}
-
-static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
-{
-	__m128i tmp2, tmp4;
-	
-	tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
-	tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
-	tmp4 = _mm_slli_si128(*tmp3, 0x04);
-	*tmp3 = _mm_xor_si128(*tmp3, tmp4);
-	tmp4 = _mm_slli_si128(tmp4, 0x04);
-	*tmp3 = _mm_xor_si128(*tmp3, tmp4);
-	tmp4 = _mm_slli_si128(tmp4, 0x04);
-	*tmp3 = _mm_xor_si128(*tmp3, tmp4);
-	*tmp3 = _mm_xor_si128(*tmp3, tmp2);
-}
-
-// Special thanks to Intel for helping me
-// with ExpandAESKey256() and its subroutines
-static inline void ExpandAESKey256(char *keybuf)
-{
-	__m128i tmp1, tmp2, tmp3, *keys;
-	
-	keys = (__m128i *)keybuf;
-	
-	tmp1 = _mm_load_si128((__m128i *)keybuf);
-	tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
-	
-	tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
-	ExpandAESKey256_sub1(&tmp1, &tmp2);
-	keys[2] = tmp1;
-	ExpandAESKey256_sub2(&tmp1, &tmp3);
-	keys[3] = tmp3;
-	
-	tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
-	ExpandAESKey256_sub1(&tmp1, &tmp2);
-	keys[4] = tmp1;
-	ExpandAESKey256_sub2(&tmp1, &tmp3);
-	keys[5] = tmp3;
-	
-	tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
-	ExpandAESKey256_sub1(&tmp1, &tmp2);
-	keys[6] = tmp1;
-	ExpandAESKey256_sub2(&tmp1, &tmp3);
-	keys[7] = tmp3;
-	
-	tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
-	ExpandAESKey256_sub1(&tmp1, &tmp2);
-	keys[8] = tmp1;
-	ExpandAESKey256_sub2(&tmp1, &tmp3);
-	keys[9] = tmp3;
-	
-	tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
-	ExpandAESKey256_sub1(&tmp1, &tmp2);
-	keys[10] = tmp1;
-	ExpandAESKey256_sub2(&tmp1, &tmp3);
-	keys[11] = tmp3;
-	
-	tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
-	ExpandAESKey256_sub1(&tmp1, &tmp2);
-	keys[12] = tmp1;
-	ExpandAESKey256_sub2(&tmp1, &tmp3);
-	keys[13] = tmp3;
-	
-	tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
-	ExpandAESKey256_sub1(&tmp1, &tmp2);
-	keys[14] = tmp1;
-}
-
-// align to 64 byte cache line
-typedef struct 
-{
-    uint8_t long_state[MEMORY] __attribute((aligned(64)));
-    union cn_slow_hash_state state;
-    uint8_t text[INIT_SIZE_BYTE] __attribute((aligned(64)));
-    uint64_t a[AES_BLOCK_SIZE >> 3] __attribute__((aligned(64)));
-    uint64_t b[AES_BLOCK_SIZE >> 3] __attribute__((aligned(64)));
-    uint8_t c[AES_BLOCK_SIZE] __attribute__((aligned(64)));
-} cryptonight_ctx;
-
-static __thread cryptonight_ctx ctx;
-
-void cryptonight_hash_aes( void *restrict output, const void *input, int len )
-{
-    uint8_t ExpandedKey[256] __attribute__((aligned(64)));
-    __m128i *longoutput, *expkey, *xmminput;
-    size_t i, j;
-    
-    keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
-
-    if ( cryptonightV7 && len < 43 )
-      return;
-
-    const uint64_t tweak = cryptonightV7 
-                         ? *((const uint64_t*) (((const uint8_t*)input) + 35))
-                           ^ ctx.state.hs.w[24] : 0; 
-
-    memcpy( ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE );
-    ExpandAESKey256( ExpandedKey );
-    memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
-    
-    longoutput = (__m128i*)ctx.long_state;
-    xmminput   = (__m128i*)ctx.text;
-    expkey     = (__m128i*)ExpandedKey;
-    
-    // prefetch expkey, xmminput and enough longoutput for 4 iterations
-    _mm_prefetch( xmminput,     _MM_HINT_T0 );
-    _mm_prefetch( xmminput + 4, _MM_HINT_T0 );
-    _mm_prefetch( expkey,     _MM_HINT_T0 );
-    _mm_prefetch( expkey + 4, _MM_HINT_T0 );
-    _mm_prefetch( expkey + 8, _MM_HINT_T0 );
-    for ( i = 0; i < 64; i += 16 )
-    {
-        __builtin_prefetch( longoutput + i,      1, 0 );
-        __builtin_prefetch( longoutput + i +  4, 1, 0 );
-        __builtin_prefetch( longoutput + i +  8, 1, 0 );
-        __builtin_prefetch( longoutput + i + 12, 1, 0 );
-    }
-
-    // n-4 iterations
-    for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
-                         i += INIT_SIZE_M128I )
-    {
-        // prefetch 4 iterations ahead.
-        __builtin_prefetch( longoutput + i + 64, 1, 0 );
-        __builtin_prefetch( longoutput + i + 68, 1, 0 );
-
-	for ( j = 0; j < 10; j++ )
-	{
-		xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
-		xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
-		xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
-		xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
-		xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
-		xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
-		xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
-		xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
-	}
-	_mm_store_si128( &( longoutput[i  ] ), xmminput[0] );
-	_mm_store_si128( &( longoutput[i+1] ), xmminput[1] );
-	_mm_store_si128( &( longoutput[i+2] ), xmminput[2] );
-	_mm_store_si128( &( longoutput[i+3] ), xmminput[3] );
-	_mm_store_si128( &( longoutput[i+4] ), xmminput[4] );
-	_mm_store_si128( &( longoutput[i+5] ), xmminput[5] );
-	_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
-	_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
-    }
-    // last 4 iterations
-    for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
-    {
-        for ( j = 0; j < 10; j++ )
-        {
-                xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
-                xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
-                xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
-                xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
-                xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
-                xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
-                xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
-                xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
-        }
-        _mm_store_si128( &( longoutput[i  ] ), xmminput[0] );
-        _mm_store_si128( &( longoutput[i+1] ), xmminput[1] );
-        _mm_store_si128( &( longoutput[i+2] ), xmminput[2] );
-        _mm_store_si128( &( longoutput[i+3] ), xmminput[3] );
-        _mm_store_si128( &( longoutput[i+4] ), xmminput[4] );
-        _mm_store_si128( &( longoutput[i+5] ), xmminput[5] );
-        _mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
-        _mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
-    }
-
-    ctx.a[0] = ((uint64_t *)ctx.state.k)[0] ^ ((uint64_t *)ctx.state.k)[4];
-    ctx.b[0] = ((uint64_t *)ctx.state.k)[2] ^ ((uint64_t *)ctx.state.k)[6];
-    ctx.a[1] = ((uint64_t *)ctx.state.k)[1] ^ ((uint64_t *)ctx.state.k)[5];
-    ctx.b[1] = ((uint64_t *)ctx.state.k)[3] ^ ((uint64_t *)ctx.state.k)[7];
-
-    uint64_t a[2] __attribute((aligned(16))),
-             b[2] __attribute((aligned(16))),
-             c[2] __attribute((aligned(16)));
-    a[0] = ctx.a[0];
-    a[1] = ctx.a[1];
-    __m128i b_x = _mm_load_si128( (__m128i*)ctx.b );
-    __m128i a_x = _mm_load_si128( (__m128i*)a );
-    __m128i* lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
-    __m128i c_x = _mm_load_si128( lsa );
-    uint64_t *nextblock;
-    uint64_t hi, lo;
-
-    // n-1 iterations
-    for( i = 0; __builtin_expect( i < 0x7ffff, 1 ); i++ )
-    {	  
-	c_x = _mm_aesenc_si128( c_x, a_x );
-	_mm_store_si128( (__m128i*)c, c_x );
-        b_x = _mm_xor_si128( b_x, c_x );
-        nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
-        _mm_store_si128( lsa, b_x );
-
-        if ( cryptonightV7 )
-        {
-           const uint8_t tmp = ( (const uint8_t*)(lsa) )[11];
-           const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
-           ((uint8_t*)(lsa))[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
-        } 
-
-	b[0] = nextblock[0];
-	b[1] = nextblock[1];
-
-        // hi,lo = 64bit x 64bit multiply of c[0] and b[0]
-	__asm__( "mulq %3\n\t"
-	         : "=d" ( hi ),
-	           "=a" ( lo )
-	         : "%a" ( c[0] ),
-	           "rm" ( b[0] )
-		 : "cc" );
-
-        b_x = c_x;
-
-        a[0] += hi;
-        a[1] += lo;
-        nextblock[0] = a[0];
-        nextblock[1] = cryptonightV7 ? a[1] ^ tweak : a[1];
-        a[0] ^= b[0];
-        a[1] ^= b[1];
-
-        lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
-        a_x = _mm_load_si128( (__m128i*)a );
-        c_x = _mm_load_si128( lsa );
-    }
-    // abreviated nth iteration
-    c_x = _mm_aesenc_si128( c_x, a_x );
-    _mm_store_si128( (__m128i*)c, c_x );
-    b_x = _mm_xor_si128( b_x, c_x );
-    nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
-    _mm_store_si128( lsa, b_x );
-
-    if ( cryptonightV7 )
-    {
-       const uint8_t tmp = ( (const uint8_t*)(lsa) )[11];
-       const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
-       ((uint8_t*)(lsa))[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
-    }
-
-    b[0] = nextblock[0];
-    b[1] = nextblock[1];
-
-    __asm__( "mulq %3\n\t"
-             : "=d" ( hi ),
-               "=a" ( lo )
-             : "%a" ( c[0] ),
-               "rm" ( b[0] )
-             : "cc" );
-
-    a[0] += hi;
-    a[1] += lo;
-    nextblock[0] = a[0];
-    nextblock[1] = cryptonightV7 ? a[1] ^ tweak : a[1];
-    a[0] ^= b[0];
-    a[1] ^= b[1];
-
-    memcpy( ExpandedKey, &ctx.state.hs.b[32], AES_KEY_SIZE );
-    ExpandAESKey256( ExpandedKey );
-    memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
-    
-    // prefetch expkey, all of xmminput and enough longoutput for 4 loops
-    _mm_prefetch( xmminput,     _MM_HINT_T0 );
-    _mm_prefetch( xmminput + 4, _MM_HINT_T0 );
-    for ( i = 0; i < 64; i += 16 )
-    {
-       _mm_prefetch( longoutput + i,      _MM_HINT_T0 );
-       _mm_prefetch( longoutput + i +  4, _MM_HINT_T0 );
-       _mm_prefetch( longoutput + i +  8, _MM_HINT_T0 );
-       _mm_prefetch( longoutput + i + 12, _MM_HINT_T0 );
-    }
-    _mm_prefetch( expkey,     _MM_HINT_T0 );
-    _mm_prefetch( expkey + 4, _MM_HINT_T0 );
-    _mm_prefetch( expkey + 8, _MM_HINT_T0 );
-
-    // n-4 iterations
-    for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
-                         i += INIT_SIZE_M128I )
-    {
-        // stay 4 iterations ahead.
-        _mm_prefetch( longoutput + i + 64, _MM_HINT_T0 );
-        _mm_prefetch( longoutput + i + 68, _MM_HINT_T0 );
-
-        xmminput[0] = _mm_xor_si128( longoutput[i  ], xmminput[0] );
-        xmminput[1] = _mm_xor_si128( longoutput[i+1], xmminput[1] );
-        xmminput[2] = _mm_xor_si128( longoutput[i+2], xmminput[2] );
-        xmminput[3] = _mm_xor_si128( longoutput[i+3], xmminput[3] );
-        xmminput[4] = _mm_xor_si128( longoutput[i+4], xmminput[4] );
-        xmminput[5] = _mm_xor_si128( longoutput[i+5], xmminput[5] );
-        xmminput[6] = _mm_xor_si128( longoutput[i+6], xmminput[6] );
-        xmminput[7] = _mm_xor_si128( longoutput[i+7], xmminput[7] );
-		
-        for( j = 0; j < 10; j++ )
-        {
-            xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
-	    xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
-	    xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
-	    xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
-	    xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
-	    xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
-	    xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
-	    xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
-        }
-    }
-    // last 4 iterations 
-    for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
-    {
-        xmminput[0] = _mm_xor_si128( longoutput[i  ], xmminput[0] );
-        xmminput[1] = _mm_xor_si128( longoutput[i+1], xmminput[1] );
-        xmminput[2] = _mm_xor_si128( longoutput[i+2], xmminput[2] );
-        xmminput[3] = _mm_xor_si128( longoutput[i+3], xmminput[3] );
-        xmminput[4] = _mm_xor_si128( longoutput[i+4], xmminput[4] );
-        xmminput[5] = _mm_xor_si128( longoutput[i+5], xmminput[5] );
-        xmminput[6] = _mm_xor_si128( longoutput[i+6], xmminput[6] );
-        xmminput[7] = _mm_xor_si128( longoutput[i+7], xmminput[7] );
-
-        for( j = 0; j < 10; j++ )
-        {
-            xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
-            xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
-            xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
-            xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
-            xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
-            xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
-            xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
-            xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
-        }
-    }
-
-    memcpy( ctx.state.init, ctx.text, INIT_SIZE_BYTE);
-    keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
-    extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
-
-}
-#endif
--- a/algo/cryptonight/cryptonight-common.c
+++ b/algo/cryptonight/cryptonight-common.c
@@ -1,133 +0,0 @@
-// Copyright (c) 2012-2013 The Cryptonote developers
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-// Modified for CPUminer by Lucas Jones
-
-#include "cpuminer-config.h"
-#include "algo-gate-api.h"
-
-#if defined(__AES__)
-  #include "algo/groestl/aes_ni/hash-groestl256.h"
-#else
-#include "crypto/c_groestl.h"
-#endif
-#include "crypto/c_blake256.h"
-#include "crypto/c_jh.h"
-#include "crypto/c_skein.h"
-#include "cryptonight.h"
-
-/*
-#if defined __unix__ && (!defined __APPLE__)
-#include <sys/mman.h>
-#elif defined _WIN32
-#include <windows.h>
-#endif
-*/
-
-void do_blake_hash(const void* input, size_t len, char* output) {
-    blake256_hash((uint8_t*)output, input, len);
-}
-
-void do_groestl_hash(const void* input, size_t len, char* output) {
-#if defined(__AES__)
-    hashState_groestl256 ctx;
-    init_groestl256( &ctx, 32 );
-    update_and_final_groestl256( &ctx, output, input, len * 8 );
-#else
-    groestl(input, len * 8, (uint8_t*)output);
-#endif
-}
-
-void do_jh_hash(const void* input, size_t len, char* output) {
-    jh_hash(32 * 8, input, 8 * len, (uint8_t*)output);
-}
-
-void do_skein_hash(const void* input, size_t len, char* output) {
-    skein_hash(8 * 32, input, 8 * len, (uint8_t*)output);
-}
-
-void (* const extra_hashes[4])( const void *, size_t, char *) =
-    { do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash };
-
-void cryptonight_hash( void *restrict output, const void *input, int len )
-{
-#if defined(__AES__)
-  cryptonight_hash_aes( output, input, len );
-#else
-  cryptonight_hash_ctx ( output, input, len );
-#endif
-}
-
-void cryptonight_hash_suw( void *restrict output, const void *input )
-{
-#if defined(__AES__)
-  cryptonight_hash_aes( output, input, 76 );
-#else
-  cryptonight_hash_ctx ( output, input, 76 );
-#endif
-}
-
-bool cryptonightV7 = false;
-
-int scanhash_cryptonight( struct work *work, uint32_t max_nonce,
-                   uint64_t *hashes_done, struct thr_info *mythr )
- {
-    uint32_t *pdata = work->data;
-    uint32_t *ptarget = work->target;
-    int thr_id = mythr->id;
-
-    uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
-    uint32_t n = *nonceptr - 1;
-    const uint32_t first_nonce = n + 1;
-    const uint32_t Htarg = ptarget[7];
-    uint32_t hash[32 / 4] __attribute__((aligned(32)));
-
-//    if (  (  cryptonightV7 && ( *(uint8_t*)pdata <  7 ) )
-//       || ( !cryptonightV7 && ( *(uint8_t*)pdata == 7 ) ) )
-//          applog(LOG_WARNING,"Cryptonight variant mismatch, shares may be rejected.");
-
-    do
-    {
-       *nonceptr = ++n;
-       cryptonight_hash( hash, pdata, 76 );
-       if (unlikely( hash[7] < Htarg ))
-       {
-           *hashes_done = n - first_nonce + 1;
-//           work_set_target_ratio( work, hash );
-	   return true;
-       }
-    } while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
-    
-    *hashes_done = n - first_nonce + 1;
-    return 0;
-}
-
-bool register_cryptonight_algo( algo_gate_t* gate )
-{
-  applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer");
-  applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
-  applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
-  cryptonightV7 = false;
-  register_json_rpc2( gate );
-  gate->optimizations = SSE2_OPT | AES_OPT;
-  gate->scanhash         = (void*)&scanhash_cryptonight;
-  gate->hash             = (void*)&cryptonight_hash;
-  gate->hash_suw         = (void*)&cryptonight_hash_suw;  
-  return true;
-};
-
-bool register_cryptonightv7_algo( algo_gate_t* gate )
-{
-  applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer");
-  applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
-  applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
-  cryptonightV7 = true;
-  register_json_rpc2( gate );
-  gate->optimizations = SSE2_OPT | AES_OPT;
-  gate->scanhash      = (void*)&scanhash_cryptonight;
-  gate->hash          = (void*)&cryptonight_hash;
-  gate->hash_suw      = (void*)&cryptonight_hash_suw;
-  return true;
-};
-
--- a/algo/cryptonight/cryptonight.c
+++ b/algo/cryptonight/cryptonight.c
@@ -1,310 +0,0 @@
-// Copyright (c) 2012-2013 The Cryptonote developers
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-// Modified for CPUminer by Lucas Jones
-
-#include "miner.h"
-#include <memory.h>
-
-#if defined(__arm__) || defined(_MSC_VER)
-#ifndef NOASM
-#define NOASM
-#endif
-#endif
-
-#include "crypto/oaes_lib.h"
-#include "crypto/c_keccak.h"
-#include "crypto/c_groestl.h"
-#include "crypto/c_blake256.h"
-#include "crypto/c_jh.h"
-#include "crypto/c_skein.h"
-#include "crypto/int-util.h"
-//#include "crypto/hash-ops.h"
-#include "cryptonight.h"
-
-#if USE_INT128
-
-#if __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && __GNUC_MINOR__ < 6
-typedef unsigned int uint128_t __attribute__ ((__mode__ (TI)));
-#elif defined (_MSC_VER)
-/* only for mingw64 on windows */
-#undef  USE_INT128
-#define USE_INT128 (0)
-#else
-typedef __uint128_t uint128_t;
-#endif
-
-#endif
-
-#define LITE 0
-#if LITE /* cryptonight-light */
-#define MEMORY (1 << 20)
-#define ITER   (1 << 19)
-#else
-#define MEMORY (1 << 21) /* 2 MiB */
-#define ITER   (1 << 20)
-#endif
-
-#define AES_BLOCK_SIZE  16
-#define AES_KEY_SIZE    32 /*16*/
-#define INIT_SIZE_BLK   8
-#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
-
-/*
-#pragma pack(push, 1)
-union cn_slow_hash_state {
-	union hash_state hs;
-	struct {
-		uint8_t k[64];
-		uint8_t init[INIT_SIZE_BYTE];
-	};
-};
-#pragma pack(pop)
-
-static void do_blake_hash(const void* input, size_t len, char* output) {
-	blake256_hash((uint8_t*)output, input, len);
-}
-
-static void do_groestl_hash(const void* input, size_t len, char* output) {
-	groestl(input, len * 8, (uint8_t*)output);
-}
-
-static void do_jh_hash(const void* input, size_t len, char* output) {
-	int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
-	assert(likely(SUCCESS == r));
-}
-
-static void do_skein_hash(const void* input, size_t len, char* output) {
-	int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
-	assert(likely(SKEIN_SUCCESS == r));
-}
-*/
-
-extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
-extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
-#if !defined(_MSC_VER) && !defined(NOASM)
-extern int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
-extern int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
-#else
-#define fast_aesb_single_round     aesb_single_round
-#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
-#endif
-
-
-#if defined(NOASM) || !defined(__x86_64__)
-static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) {
-	// multiplier   = ab = a * 2^32 + b
-	// multiplicand = cd = c * 2^32 + d
-	// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
-	uint64_t a = hi_dword(multiplier);
-	uint64_t b = lo_dword(multiplier);
-	uint64_t c = hi_dword(multiplicand);
-	uint64_t d = lo_dword(multiplicand);
-
-	uint64_t ac = a * c;
-	uint64_t ad = a * d;
-	uint64_t bc = b * c;
-	uint64_t bd = b * d;
-
-	uint64_t adbc = ad + bc;
-	uint64_t adbc_carry = adbc < ad ? 1 : 0;
-
-	// multiplier * multiplicand = product_hi * 2^64 + product_lo
-	uint64_t product_lo = bd + (adbc << 32);
-	uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
-	*product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
-	assert(ac <= *product_hi);
-
-	return product_lo;
-}
-#else
-extern uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi);
-#endif
-
-/*
-static void (* const extra_hashes[4])(const void *, size_t, char *) = {
-		do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
-};
-*/
-
-static inline size_t e2i(const uint8_t* a) {
-#if !LITE
-	return ((uint32_t *)a)[0] & 0x1FFFF0;
-#else
-	return ((uint32_t *)a)[0] & 0xFFFF0;
-#endif
-}
-
-static inline void mul_sum_xor_dst( const uint8_t* a, uint8_t* c, uint8_t* dst, 
-         const uint64_t tweak )
-{
-	uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
-	hi += ((uint64_t*) c)[0];
-
-	((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
-	((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
-	((uint64_t*) dst)[0] = hi;
-	((uint64_t*) dst)[1] = cryptonightV7 ? lo ^ tweak : lo;
-}
-
-static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
-#if USE_INT128
-	*((uint128_t*) a) ^= *((uint128_t*) b);
-#else
-	((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
-	((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
-#endif
-}
-
-static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
-#if USE_INT128
-	*((uint128_t*) dst) = *((uint128_t*) a) ^ *((uint128_t*) b);
-#else
-	((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
-	((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
-#endif
-}
-
-typedef struct {
-	uint8_t _ALIGN(16) long_state[MEMORY];
-	union cn_slow_hash_state state;
-	uint8_t _ALIGN(16) text[INIT_SIZE_BYTE];
-	uint8_t _ALIGN(16) a[AES_BLOCK_SIZE];
-	uint8_t _ALIGN(16) b[AES_BLOCK_SIZE];
-	uint8_t _ALIGN(16) c[AES_BLOCK_SIZE];
-	oaes_ctx* aes_ctx;
-} cryptonight_ctx;
-
-static __thread cryptonight_ctx ctx;
-
-void cryptonight_hash_ctx(void* output, const void* input, int len)
-{
-//    hash_process(&ctx.state.hs, (const uint8_t*) input, len);
-    keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
-
-    if ( cryptonightV7 && len < 43 )
-      return;
-    const uint64_t tweak = cryptonightV7
-                         ? *((const uint64_t*) (((const uint8_t*)input) + 35))
-                           ^ ctx.state.hs.w[24] : 0;
-
-    ctx.aes_ctx = (oaes_ctx*) oaes_alloc();
-
-    __builtin_prefetch( ctx.text,             0, 3 );
-    __builtin_prefetch( ctx.text       +  64, 0, 3 );
-    __builtin_prefetch( ctx.long_state,       1, 0 );
-    __builtin_prefetch( ctx.long_state +  64, 1, 0 );
-    __builtin_prefetch( ctx.long_state + 128, 1, 0 );
-    __builtin_prefetch( ctx.long_state + 192, 1, 0 );
-    __builtin_prefetch( ctx.long_state + 256, 1, 0 );
-    __builtin_prefetch( ctx.long_state + 320, 1, 0 );
-    __builtin_prefetch( ctx.long_state + 384, 1, 0 );
-    __builtin_prefetch( ctx.long_state + 448, 1, 0 );
-
-	size_t i, j;
-	memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
-
-	oaes_key_import_data(ctx.aes_ctx, ctx.state.hs.b, AES_KEY_SIZE);
-	for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
-
-    __builtin_prefetch( ctx.long_state + i + 512, 1, 0 );
-    __builtin_prefetch( ctx.long_state + i + 576, 1, 0 );
-
-		aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 0], ctx.aes_ctx->key->exp_data);
-		aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 1], ctx.aes_ctx->key->exp_data);
-		aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 2], ctx.aes_ctx->key->exp_data);
-		aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 3], ctx.aes_ctx->key->exp_data);
-		aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 4], ctx.aes_ctx->key->exp_data);
-		aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 5], ctx.aes_ctx->key->exp_data);
-		aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 6], ctx.aes_ctx->key->exp_data);
-		aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 7], ctx.aes_ctx->key->exp_data);
-		memcpy(&ctx.long_state[i], ctx.text, INIT_SIZE_BYTE);
-	}
-
-	xor_blocks_dst(&ctx.state.k[0], &ctx.state.k[32], ctx.a);
-	xor_blocks_dst(&ctx.state.k[16], &ctx.state.k[48], ctx.b);
-
-	for (i = 0; likely(i < ITER / 4); ++i)
-        {
-           /* Dependency chain: address -> read value ------+
-            * written value <-+ hard function (AES or MUL) <+
-            * next address  <-+
-            */
-           /* Iteration 1 */
-           j = e2i(ctx.a);
-           aesb_single_round(&ctx.long_state[j], ctx.c, ctx.a);
-           xor_blocks_dst(ctx.c, ctx.b, &ctx.long_state[j]);
-
-           if ( cryptonightV7 )
-           {
-              uint8_t *lsa = (uint8_t*)&ctx.long_state[((uint64_t *)(ctx.a))[0] & 0x1FFFF0];
-              const uint8_t tmp = lsa[11];
-              const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
-              lsa[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
-           }
-
-           /* Iteration 2 */
-           mul_sum_xor_dst(ctx.c, ctx.a, &ctx.long_state[e2i(ctx.c)], tweak );
-
-           /* Iteration 3 */
-           j = e2i(ctx.a);
-           aesb_single_round(&ctx.long_state[j], ctx.b, ctx.a);
-           xor_blocks_dst(ctx.b, ctx.c, &ctx.long_state[j]);
-
-           if ( cryptonightV7 )
-           {
-              uint8_t *lsa = (uint8_t*)&ctx.long_state[((uint64_t *)(ctx.a))[0] & 0x1FFFF0];
-              const uint8_t tmp = lsa[11];
-              const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
-              lsa[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
-           }
-
-           /* Iteration 4 */
-           mul_sum_xor_dst(ctx.b, ctx.a, &ctx.long_state[e2i(ctx.b)], tweak );
-
-	}
-
-    __builtin_prefetch( ctx.text,             0, 3 );
-    __builtin_prefetch( ctx.text       +  64, 0, 3 );
-    __builtin_prefetch( ctx.long_state,       1, 0 );
-    __builtin_prefetch( ctx.long_state +  64, 1, 0 );
-    __builtin_prefetch( ctx.long_state + 128, 1, 0 );
-    __builtin_prefetch( ctx.long_state + 192, 1, 0 );
-    __builtin_prefetch( ctx.long_state + 256, 1, 0 );
-    __builtin_prefetch( ctx.long_state + 320, 1, 0 );
-    __builtin_prefetch( ctx.long_state + 384, 1, 0 );
-    __builtin_prefetch( ctx.long_state + 448, 1, 0 );
-
-	memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
-	oaes_key_import_data(ctx.aes_ctx, &ctx.state.hs.b[32], AES_KEY_SIZE);
-	for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
-
-    __builtin_prefetch( ctx.long_state + i + 512, 1, 0 );
-    __builtin_prefetch( ctx.long_state + i + 576, 1, 0 );
-
-		xor_blocks(&ctx.text[0 * AES_BLOCK_SIZE], &ctx.long_state[i + 0 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx.text[0 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
-		xor_blocks(&ctx.text[1 * AES_BLOCK_SIZE], &ctx.long_state[i + 1 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx.text[1 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
-		xor_blocks(&ctx.text[2 * AES_BLOCK_SIZE], &ctx.long_state[i + 2 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx.text[2 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
-		xor_blocks(&ctx.text[3 * AES_BLOCK_SIZE], &ctx.long_state[i + 3 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx.text[3 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
-		xor_blocks(&ctx.text[4 * AES_BLOCK_SIZE], &ctx.long_state[i + 4 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx.text[4 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
-		xor_blocks(&ctx.text[5 * AES_BLOCK_SIZE], &ctx.long_state[i + 5 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx.text[5 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
-		xor_blocks(&ctx.text[6 * AES_BLOCK_SIZE], &ctx.long_state[i + 6 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx.text[6 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
-		xor_blocks(&ctx.text[7 * AES_BLOCK_SIZE], &ctx.long_state[i + 7 * AES_BLOCK_SIZE]);
-		aesb_pseudo_round_mut(&ctx.text[7 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
-	}
-	memcpy(ctx.state.init, ctx.text, INIT_SIZE_BYTE);
-//	hash_permutation(&ctx.state.hs);
-        keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
-	/*memcpy(hash, &state, 32);*/
-	extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
-	oaes_free((OAES_CTX **) &ctx.aes_ctx);
-}
-
--- a/algo/cryptonight/cryptonight.h
+++ b/algo/cryptonight/cryptonight.h
@@ -1,51 +0,0 @@
-#ifndef __CRYPTONIGHT_H_INCLUDED
-#define __CRYPTONIGHT_H_INCLUDED
-
-#include <stddef.h>
-#include "crypto/oaes_lib.h"
-#include "miner.h"
-
-#define MEMORY         (1 << 21) /* 2 MiB */
-#define MEMORY_M128I   (MEMORY >> 4) // 2 MiB / 16 = 128 ki * __m128i
-#define ITER           (1 << 20)
-#define AES_BLOCK_SIZE  16
-#define AES_KEY_SIZE    32 /*16*/
-#define INIT_SIZE_BLK   8
-#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)	// 128
-#define INIT_SIZE_M128I (INIT_SIZE_BYTE >> 4) // 8
-
-
-#pragma pack(push, 1)
-union hash_state {
-  uint8_t b[200];
-  uint64_t w[25];
-};
-#pragma pack(pop)
-
-#pragma pack(push, 1)
-union cn_slow_hash_state {
-    union hash_state hs;
-    struct {
-        uint8_t k[64];
-        uint8_t init[INIT_SIZE_BYTE];
-    };
-};
-#pragma pack(pop)
-
-void do_blake_hash(const void* input, size_t len, char* output);
-void do_groestl_hash(const void* input, size_t len, char* output);
-void do_jh_hash(const void* input, size_t len, char* output);
-void do_skein_hash(const void* input, size_t len, char* output);
-void cryptonight_hash_ctx(void* output, const void* input, int len);
-void keccakf(uint64_t st[25], int rounds);
-extern void (* const extra_hashes[4])(const void *, size_t, char *);
-
-int scanhash_cryptonight( struct work *work, uint32_t max_nonce,
-                           uint64_t *hashes_done, struct thr_info *mythr );
-
-void cryptonight_hash_aes( void *restrict output, const void *input, int len );
-
-extern bool cryptonightV7;
-
-#endif
-
--- a/algo/hodl/hodl-gate.c
+++ b/algo/hodl/hodl-gate.c
@@ -144,7 +144,7 @@ int hodl_scanhash( struct work* work, uint32_t max_nonce,
 #if defined(__AES__)
  GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, mythr->id );
  pthread_barrier_wait( &hodl_barrier );
-  return scanhash_hodl_wolf( work, max_nonce, hashes_done, thr_info );
+  return scanhash_hodl_wolf( work, max_nonce, hashes_done, mythr );
 #endif
  return false;
 }
--- a/algo/hodl/hodl-wolf.c
+++ b/algo/hodl/hodl-wolf.c
@@ -129,9 +129,10 @@ int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
 	      if( FinalPoW[7] <= ptarget[7] )
 	      {
 	          pdata[20] = swab32( BlockHdr[20] );
-		  pdata[21] = swab32( BlockHdr[21] );
-		  *hashes_done = CollisionCount;
-		  return(1);
+             pdata[21] = swab32( BlockHdr[21] );
+		       *hashes_done = CollisionCount;
+             submit_solution( work, FinalPoW, mythr );
+             return(0);
 	      }
 	   }
 	}
@@ -198,7 +199,8 @@ int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
                  pdata[20] = swab32( BlockHdr[20] );
                  pdata[21] = swab32( BlockHdr[21] );
                  *hashes_done = CollisionCount;
-                  return(1);
+                  submit_solution( work, FinalPoW, mythr );
+                  return(0);
              }
           }
        }
--- a/algo/lyra2/allium.c
+++ b/algo/lyra2/allium.c
@@ -76,37 +76,34 @@ int scanhash_allium( struct work *work, uint32_t max_nonce,
                     uint64_t *hashes_done, struct thr_info *mythr )
 {
    uint32_t _ALIGN(128) hash[8];
-    uint32_t _ALIGN(128) endiandata[20];
+    uint32_t _ALIGN(128) edata[20];
    uint32_t *pdata = work->data;
    uint32_t *ptarget = work->target;
-
-    const uint32_t Htarg = ptarget[7];
    const uint32_t first_nonce = pdata[19];
    uint32_t nonce = first_nonce;
-    int thr_id = mythr->id;  // thr_id arg is deprecated
+    const int thr_id = mythr->id; 

    if ( opt_benchmark )
        ptarget[7] = 0x3ffff;

    for ( int i = 0; i < 19; i++ )
-        be32enc( &endiandata[i], pdata[i] );
+        edata[i] = bswap_32( pdata[i] );

    sph_blake256_init( &allium_ctx.blake );
-    sph_blake256( &allium_ctx.blake, endiandata, 64 );
+    sph_blake256( &allium_ctx.blake, edata, 64 );

    do {
-        be32enc( &endiandata[19], nonce );
-        allium_hash( hash, endiandata );
-        if ( hash[7] <= Htarg )
-        if ( fulltest( hash, ptarget ) && !opt_benchmark )
+        edata[19] = nonce;
+        allium_hash( hash, edata );
+        if ( valid_hash( hash, ptarget ) && !opt_benchmark )
        {
-            pdata[19] = nonce;
+            pdata[19] = bswap_32( nonce );
            submit_solution( work, hash, mythr );
        }
        nonce++;
    } while ( nonce < max_nonce && !work_restart[thr_id].restart );
    pdata[19] = nonce;
-    *hashes_done = pdata[19] - first_nonce + 1;
+    *hashes_done = pdata[19] - first_nonce;
    return 0;
 }

--- a/algo/lyra2/lyra2-gate.c
+++ b/algo/lyra2/lyra2-gate.c
@@ -119,7 +119,7 @@ bool register_lyra2rev2_algo( algo_gate_t* gate )
  gate->scanhash  = (void*)&scanhash_lyra2rev2;
  gate->hash      = (void*)&lyra2rev2_hash;
 #endif
-  gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
+  gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
  gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
  opt_target_factor = 256.0;
  return true;
@@ -228,13 +228,14 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )

 bool register_phi2_algo( algo_gate_t* gate )
 {
-//   init_phi2_ctx();
-   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
+   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
   gate->get_work_data_size = (void*)&phi2_get_work_data_size;
   gate->decode_extra_data  = (void*)&phi2_decode_extra_data;
   gate->build_extraheader  = (void*)&phi2_build_extraheader;
   opt_target_factor = 256.0;
-#if defined(PHI2_4WAY)
+#if defined(PHI2_8WAY)
+   gate->scanhash           = (void*)&scanhash_phi2_8way;
+#elif defined(PHI2_4WAY)
   gate->scanhash           = (void*)&scanhash_phi2_4way;
 #else
   init_phi2_ctx();
--- a/algo/lyra2/lyra2-gate.h
+++ b/algo/lyra2/lyra2-gate.h
@@ -184,19 +184,26 @@ bool init_allium_ctx();

 /////////////////////////////////////////

-#if defined(__AVX2__) && defined(__AES__)
-//  #define PHI2_4WAY
+#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
+  #define PHI2_8WAY 1
+#elif defined(__AVX2__) && defined(__AES__)
+  #define PHI2_4WAY 1
 #endif

 extern bool phi2_has_roots;

 bool register_phi2_algo( algo_gate_t* gate );
-#if defined(PHI2_4WAY)
+#if defined(PHI2_8WAY)
+
+void phi2_8way_hash( void *state, const void *input );
+int scanhash_phi2_8way( struct work *work, uint32_t max_nonce,
+                     uint64_t *hashes_done, struct thr_info *mythr );
+
+#elif defined(PHI2_4WAY)

 void phi2_hash_4way( void *state, const void *input );
 int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
                     uint64_t *hashes_done, struct thr_info *mythr );
-//void init_phi2_ctx();

 #else

--- a/algo/lyra2/lyra2rev2-4way.c
+++ b/algo/lyra2/lyra2rev2-4way.c
@@ -7,27 +7,8 @@
 #include "algo/cubehash/cubehash_sse2.h" 
 #include "algo/cubehash/cube-hash-2way.h"

-#if defined (LYRA2REV2_8WAY)
-
-typedef struct {
-   blake256_8way_context     blake;
-   keccak256_8way_context    keccak;
-   cube_4way_context          cube;
-   skein256_8way_context     skein;
-   bmw256_8way_context          bmw;
-} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
-
-static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
-
-bool init_lyra2rev2_8way_ctx()
-{
-   keccak256_8way_init( &l2v2_8way_ctx.keccak );
-   cube_4way_init( &l2v2_8way_ctx.cube, 256, 16, 32 );
-   skein256_8way_init( &l2v2_8way_ctx.skein );
-   bmw256_8way_init( &l2v2_8way_ctx.bmw );
-   return true;
-}

+#if 0
 void lyra2rev2_8way_hash( void *state, const void *input )
 {
   uint32_t vhash[8*8] __attribute__ ((aligned (128)));
@@ -52,14 +33,24 @@ void lyra2rev2_8way_hash( void *state, const void *input )
   keccak256_8way_update( &ctx.keccak, vhashA, 32 );
   keccak256_8way_close( &ctx.keccak, vhash );

-   rintrlv_8x64_4x128( vhashA, vhashB, vhash, 256 );
+   dintrlv_8x64( hash0, hash1, hash2, hash3,
+                 hash4, hash5, hash6, hash7, vhash, 256 );

-   cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
-   cube_4way_init( &ctx.cube, 256, 16, 32 );
-   cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );

-   dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
-   dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
+//   cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
+//   cube_4way_init( &ctx.cube, 256, 16, 32 );
+//   cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
+//
+//   dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
+//   dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );

   intrlv_2x256( vhash, hash0, hash1, 256 );
   LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
@@ -80,15 +71,123 @@ void lyra2rev2_8way_hash( void *state, const void *input )
   skein256_8way_update( &ctx.skein, vhash, 32 );
   skein256_8way_close( &ctx.skein, vhash );

-   rintrlv_8x64_4x128( vhashA, vhashB, vhash, 256 );
+   dintrlv_8x64( hash0, hash1, hash2, hash3,
+                 hash4, hash5, hash6, hash7, vhash, 256 );

-   cube_4way_init( &ctx.cube, 256, 16, 32 );
-   cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
-   cube_4way_init( &ctx.cube, 256, 16, 32 );
-   cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
-   
-   dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
-   dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
+   cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
+
+//   cube_4way_init( &ctx.cube, 256, 16, 32 );
+//   cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
+//   cube_4way_init( &ctx.cube, 256, 16, 32 );
+//   cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
+//   
+//   dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
+//   dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
+
+   intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
+                hash7, 256 );
+
+   bmw256_8way_update( &ctx.bmw, vhash, 32 );
+   bmw256_8way_close( &ctx.bmw, state );
+}
+#endif
+
+
+
+
+#if defined (LYRA2REV2_8WAY)
+
+typedef struct {
+   blake256_8way_context     blake;
+   keccak256_8way_context    keccak;
+   cubehashParam             cube;
+   skein256_8way_context     skein;
+   bmw256_8way_context          bmw;
+} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
+
+static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
+
+bool init_lyra2rev2_8way_ctx()
+{
+   keccak256_8way_init( &l2v2_8way_ctx.keccak );
+   cubehashInit( &l2v2_8way_ctx.cube, 256, 16, 32 );
+   skein256_8way_init( &l2v2_8way_ctx.skein );
+   bmw256_8way_init( &l2v2_8way_ctx.bmw );
+   return true;
+}
+
+void lyra2rev2_8way_hash( void *state, const void *input )
+{
+   uint32_t vhash[8*8] __attribute__ ((aligned (128)));
+   uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
+   uint32_t hash0[8] __attribute__ ((aligned (64)));
+   uint32_t hash1[8] __attribute__ ((aligned (64)));
+   uint32_t hash2[8] __attribute__ ((aligned (64)));
+   uint32_t hash3[8] __attribute__ ((aligned (64)));
+   uint32_t hash4[8] __attribute__ ((aligned (64)));
+   uint32_t hash5[8] __attribute__ ((aligned (64)));
+   uint32_t hash6[8] __attribute__ ((aligned (64)));
+   uint32_t hash7[8] __attribute__ ((aligned (64)));
+   lyra2v2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
+   memcpy( &ctx, &l2v2_8way_ctx, sizeof(l2v2_8way_ctx) );
+
+   blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
+   blake256_8way_close( &ctx.blake, vhash );
+
+   rintrlv_8x32_8x64( vhashA, vhash, 256 );
+
+   keccak256_8way_update( &ctx.keccak, vhashA, 32 );
+   keccak256_8way_close( &ctx.keccak, vhash );
+
+   dintrlv_8x64( hash0, hash1, hash2, hash3,
+                 hash4, hash5, hash6, hash7, vhash, 256 );
+
+   cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
+
+   intrlv_2x256( vhash, hash0, hash1, 256 );
+   LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
+   dintrlv_2x256( hash0, hash1, vhash, 256 );
+   intrlv_2x256( vhash, hash2, hash3, 256 );
+   LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
+   dintrlv_2x256( hash2, hash3, vhash, 256 );
+   intrlv_2x256( vhash, hash4, hash5, 256 );
+   LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
+   dintrlv_2x256( hash4, hash5, vhash, 256 );
+   intrlv_2x256( vhash, hash6, hash7, 256 );
+   LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
+   dintrlv_2x256( hash6, hash7, vhash, 256 );
+
+   intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
+                hash7, 256 );
+
+   skein256_8way_update( &ctx.skein, vhash, 32 );
+   skein256_8way_close( &ctx.skein, vhash );
+
+   dintrlv_8x64( hash0, hash1, hash2, hash3,
+                 hash4, hash5, hash6, hash7, vhash, 256 );
+
+   cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
+   cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );

   intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, 
                hash7, 256 );
@@ -97,49 +196,49 @@ void lyra2rev2_8way_hash( void *state, const void *input )
   bmw256_8way_close( &ctx.bmw, state );
 }

-int scanhash_lyra2rev2_8way( struct work *work, uint32_t max_nonce,
+int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
                             uint64_t *hashes_done, struct thr_info *mythr )
 {
   uint32_t hash[8*8] __attribute__ ((aligned (128)));
   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
-   uint32_t *hash7 = &(hash[7<<3]);
-   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
+   uint32_t *hashd7 = &hash[7*8];
+   uint32_t lane_hash[8] __attribute__ ((aligned (32)));
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
   const uint32_t last_nonce = max_nonce - 8;
   uint32_t n = first_nonce;
-   const uint32_t Htarg = ptarget[7];
-   __m256i *noncev = (__m256i*)vdata + 19;   // aligned
-   int thr_id = mythr->id; 
+   const uint32_t targ32 = ptarget[7];
+   __m256i  *noncev = (__m256i*)vdata + 19;
+   const int thr_id = mythr->id;
+   const bool bench = opt_benchmark;

-   if ( opt_benchmark )
-      ( (uint32_t*)ptarget )[7] = 0x0000ff;
+   if ( bench )  ptarget[7] = 0x0000ff;

   mm256_bswap32_intrlv80_8x32( vdata, pdata );
-
+   *noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
   blake256_8way_init( &l2v2_8way_ctx.blake );
   blake256_8way_update( &l2v2_8way_ctx.blake, vdata, 64 );

   do
   {
-      *noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
-                                                  n+3, n+2, n+1, n ) );
-
      lyra2rev2_8way_hash( hash, vdata );
      pdata[19] = n;

-      for ( int lane = 0; lane < 8; lane++ ) if ( hash7[lane] <= Htarg )
+      for ( int lane = 0; lane < 8; lane++ )
+      if ( unlikely( hashd7[lane] <= targ32 ) )
      {
         extr_lane_8x32( lane_hash, hash, lane, 256 );
-         if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
+         if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
         {
-            pdata[19] = n + lane;
-            submit_lane_solution( work, lane_hash, mythr, lane );
+             pdata[19] = bswap_32( n + lane );
+             submit_lane_solution( work, lane_hash, mythr, lane );
         }
      }
+      *noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
      n += 8;
-   } while ( (n < last_nonce) && !work_restart[thr_id].restart);
+   } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
+   pdata[19] = n;
   *hashes_done = n - first_nonce;
   return 0;
 }
@@ -226,15 +325,16 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
 {
   uint32_t hash[8*4] __attribute__ ((aligned (64)));
   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
-   uint32_t *hash7 = &(hash[7<<2]);
+   uint32_t *hashd7 = &(hash[7<<2]);
   uint32_t lane_hash[8] __attribute__ ((aligned (32)));
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
+   const uint32_t last_nonce = max_nonce - 4;
   uint32_t n = first_nonce;
-   const uint32_t Htarg = ptarget[7];
-   __m128i *noncev = (__m128i*)vdata + 19;   // aligned
-   int thr_id = mythr->id;  // thr_id arg is deprecated
+   const uint32_t targ32 = ptarget[7];
+   __m128i *noncev = (__m128i*)vdata + 19;  
+   int thr_id = mythr->id; 

   if ( opt_benchmark )
      ( (uint32_t*)ptarget )[7] = 0x0000ff;
@@ -249,20 +349,20 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
      *noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );

      lyra2rev2_4way_hash( hash, vdata );
-      pdata[19] = n;

-      for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
+      for ( int lane = 0; lane < 4; lane++ ) if ( hashd7[lane] <= targ32 )
      {
         extr_lane_4x32( lane_hash, hash, lane, 256 );
-         if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
+         if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark )
         {
            pdata[19] = n + lane;         
            submit_lane_solution( work, lane_hash, mythr, lane );
         }
      }
      n += 4;
-   } while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
-   *hashes_done = n - first_nonce + 1;
+   } while ( (n < last_nonce) && !work_restart[thr_id].restart);
+   pdata[19] = n;
+   *hashes_done = n - first_nonce;
   return 0;
 }

--- a/algo/lyra2/lyra2rev2.c
+++ b/algo/lyra2/lyra2rev2.c
@@ -99,7 +99,7 @@ int scanhash_lyra2rev2( struct work *work,
 		lyra2rev2_hash(hash, endiandata);

 		if (hash[7] <= Htarg )
-      if( fulltest( hash, ptarget ) && !opt_benchmark )
+      if( valid_hash( hash, ptarget ) && !opt_benchmark )
      {
 			pdata[19] = nonce;
         submit_solution( work, hash, mythr );
--- a/algo/lyra2/lyra2rev3-4way.c
+++ b/algo/lyra2/lyra2rev3-4way.c
@@ -130,15 +130,15 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
 {
   uint32_t hash[8*16] __attribute__ ((aligned (128)));
   uint32_t vdata[20*16] __attribute__ ((aligned (64)));
-   uint32_t *hash7 = &hash[7<<4];
+   uint32_t *hashd7 = &hash[7*16];
   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
   uint32_t *pdata = work->data;
   const uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
   uint32_t n = first_nonce;
   const uint32_t last_nonce = max_nonce - 16;
-   const uint32_t Htarg = ptarget[7];
-   __m512i  *noncev = (__m512i*)vdata + 19;   // aligned
+   const uint32_t targ32 = ptarget[7];
+   __m512i  *noncev = (__m512i*)vdata + 19;
   const int thr_id = mythr->id;

   if ( opt_benchmark )  ( (uint32_t*)ptarget )[7] = 0x0000ff;
@@ -159,10 +159,10 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
      pdata[19] = n;

      for ( int lane = 0; lane < 16; lane++ )
-      if ( unlikely( hash7[lane] <= Htarg ) )
+      if ( unlikely( hashd7[lane] <= targ32 ) )
      {
         extr_lane_16x32( lane_hash, hash, lane, 256 );
-         if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
+         if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
         {
             pdata[19] = n + lane;
             submit_lane_solution( work, lane_hash, mythr, lane );
@@ -170,6 +170,7 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
      }
      n += 16;
   } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
+   pdata[19] = n;
   *hashes_done = n - first_nonce;
   return 0;
 }
@@ -194,7 +195,7 @@ bool init_lyra2rev3_8way_ctx()

 void lyra2rev3_8way_hash( void *state, const void *input )
 {
-   uint32_t vhash[8*8] __attribute__ ((aligned (64)));
+   uint32_t vhash[8*8] __attribute__ ((aligned (128)));
   uint32_t hash0[8] __attribute__ ((aligned (64)));
   uint32_t hash1[8] __attribute__ ((aligned (32)));
   uint32_t hash2[8] __attribute__ ((aligned (32)));
@@ -250,17 +251,17 @@ void lyra2rev3_8way_hash( void *state, const void *input )
 int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
                             uint64_t *hashes_done, struct thr_info *mythr )
 {
-   uint32_t hash[8*8] __attribute__ ((aligned (64)));
+   uint32_t hash[8*8] __attribute__ ((aligned (128)));
   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
-   uint32_t *hash7 = &hash[7<<3];
+   uint32_t *hashd7 = &hash[7*8];
   uint32_t lane_hash[8] __attribute__ ((aligned (32)));
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
   const uint32_t last_nonce = max_nonce - 8;
   uint32_t n = first_nonce;
-   const uint32_t Htarg = ptarget[7];
-   __m256i  *noncev = (__m256i*)vdata + 19;   // aligned
+   const uint32_t targ32 = ptarget[7];
+   __m256i  *noncev = (__m256i*)vdata + 19;  
   const int thr_id = mythr->id;
   const bool bench = opt_benchmark;

@@ -277,7 +278,7 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
      pdata[19] = n;

      for ( int lane = 0; lane < 8; lane++ )
-      if ( unlikely( hash7[lane] <= Htarg ) )
+      if ( unlikely( hashd7[lane] <= targ32 ) )
      {
         extr_lane_8x32( lane_hash, hash, lane, 256 );
         if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
@@ -357,42 +358,41 @@ int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce,
 {
   uint32_t hash[8*4] __attribute__ ((aligned (64)));
   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
-   uint32_t *hash7 = &(hash[7<<2]);
+   uint32_t *hashd7 = &(hash[7*4]);
   uint32_t lane_hash[8] __attribute__ ((aligned (32)));
   uint32_t *pdata = work->data;
   const uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
   uint32_t n = first_nonce;
-   const uint32_t Htarg = ptarget[7];
-   __m128i  *noncev = (__m128i*)vdata + 19;   // aligned
-   const int thr_id = mythr->id;  // thr_id arg is deprecated
+   const uint32_t targ32 = ptarget[7];
+   __m128i  *noncev = (__m128i*)vdata + 19; 
+   const int thr_id = mythr->id;
   
   if ( opt_benchmark )
      ( (uint32_t*)ptarget )[7] = 0x0000ff;

   mm128_bswap32_intrlv80_4x32( vdata, pdata );
+   *noncev = _mm_set_epi32( n+3, n+2, n+1, n );

   blake256_4way_init( &l2v3_4way_ctx.blake );
   blake256_4way_update( &l2v3_4way_ctx.blake, vdata, 64 );

   do
   {
-      *noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
-
      lyra2rev3_4way_hash( hash, vdata );
-      pdata[19] = n;
-
-      for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
+      for ( int lane = 0; lane < 4; lane++ ) if ( hashd7[lane] <= targ32 )
      {
         extr_lane_4x32( lane_hash, hash, lane, 256 );
-         if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
+         if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) 
         {
-              pdata[19] = n + lane;    
+              pdata[19] = bswap_32( n + lane );    
              submit_lane_solution( work, lane_hash, mythr, lane );
 	      }
      }
+      *noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) );
      n += 4;
   } while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
+   pdata[19] = n;
   *hashes_done = n - first_nonce + 1;
   return 0;
 }
--- a/algo/lyra2/lyra2rev3.c
+++ b/algo/lyra2/lyra2rev3.c
@@ -88,7 +88,7 @@ int scanhash_lyra2rev3( struct work *work,
 	lyra2rev3_hash(hash, endiandata);

      if (hash[7] <= Htarg )
-      if( fulltest( hash, ptarget ) && !opt_benchmark )
+      if( valid_hash( hash, ptarget ) && !opt_benchmark )
      {
          pdata[19] = nonce;
          submit_solution( work, hash, mythr );
--- a/algo/lyra2/lyra2z.c
+++ b/algo/lyra2/lyra2z.c
@@ -56,7 +56,7 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
 	const uint32_t Htarg = ptarget[7];
 	const uint32_t first_nonce = pdata[19];
 	uint32_t nonce = first_nonce;
-   int thr_id = mythr->id;  // thr_id arg is deprecated
+   int thr_id = mythr->id; 

 	if (opt_benchmark)
 		ptarget[7] = 0x0000ff;
@@ -65,14 +65,13 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
 		be32enc(&endiandata[i], pdata[i]);
 	}

-        lyra2z_midstate( endiandata );
+   lyra2z_midstate( endiandata );

 	do {
 		be32enc(&endiandata[19], nonce);
                lyra2z_hash( hash, endiandata );

-      if ( hash[7] <= Htarg )
-      if ( fulltest( hash, ptarget ) && !opt_benchmark )
+      if ( valid_hash( hash, ptarget ) && !opt_benchmark )
      {
 			pdata[19] = nonce;
 			submit_solution( work, hash, mythr );
--- a/algo/lyra2/lyra2z330.c
+++ b/algo/lyra2/lyra2z330.c
@@ -9,7 +9,7 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
 {
 	uint32_t _ALIGN(256) hash[16];

-        LYRA2Z( lyra2z330_wholeMatrix, hash, 32, input, 80, input, 80,
+   LYRA2Z( lyra2z330_wholeMatrix, hash, 32, input, 80, input, 80,
                 2, 330, 256 );

 	memcpy(state, hash, 32);
@@ -18,38 +18,40 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
 int scanhash_lyra2z330( struct work *work, uint32_t max_nonce,
                        uint64_t *hashes_done, struct thr_info *mythr )
 {
-   uint32_t hash[8] __attribute__ ((aligned (64))); 
-   uint32_t endiandata[20] __attribute__ ((aligned (64)));
+   uint32_t hash[8] __attribute__ ((aligned (128))); 
+   uint32_t edata[20] __attribute__ ((aligned (64)));
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
-   const uint32_t Htarg = ptarget[7];
   const uint32_t first_nonce = pdata[19];
   uint32_t nonce = first_nonce;
-   int thr_id = mythr->id;  // thr_id arg is deprecated
+   const int thr_id = mythr->id; 

   if (opt_benchmark)
 	ptarget[7] = 0x0000ff;

-   casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
-   casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
-   casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
-   casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
-   casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
+   casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
+   casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
+   casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
+   casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
+   casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
   
   do
   {
-      be32enc( &endiandata[19], nonce );
-      lyra2z330_hash( hash, endiandata, work->height );
-      if ( hash[7] <= Htarg )
-      if ( fulltest( hash, ptarget ) && !opt_benchmark )
+      edata[19] = nonce;
+
+      LYRA2Z( lyra2z330_wholeMatrix, hash, 32, edata, 80, edata, 80,
+                 2, 330, 256 );
+      
+//      lyra2z330_hash( hash, edata, work->height );
+      if ( valid_hash( hash, ptarget ) && !opt_benchmark )
      {
-         pdata[19] = nonce;
+         be32enc( pdata + 19, nonce );
         submit_solution( work, hash, mythr );
      }
      nonce++;
   } while ( nonce < max_nonce && !work_restart[thr_id].restart );
   pdata[19] = nonce;
-   *hashes_done = pdata[19] - first_nonce + 1;
+   *hashes_done = nonce - first_nonce;
   return 0;
 }

--- a/algo/lyra2/phi2-4way.c
+++ b/algo/lyra2/phi2-4way.c
@@ -1,233 +1,501 @@
-/**
- * Phi-2 algo Implementation
- */
-
 #include "lyra2-gate.h"
-
-#if defined(PHI2_4WAY)
-
 #include "algo/skein/skein-hash-4way.h"
 #include "algo/jh/jh-hash-4way.h"
 #include "algo/gost/sph_gost.h"
 #include "algo/cubehash/cubehash_sse2.h"
-#include "algo/echo/aes_ni/hash_api.h"
+#include "lyra2.h"
+#if defined(__VAES__)
+  #include "algo/echo/echo-hash-4way.h"
+#elif defined(__AES__)
+  #include "algo/echo/aes_ni/hash_api.h"
+#endif
+
+#if defined(PHI2_8WAY)
+
+typedef struct {
+     cubehashParam           cube;
+     jh512_8way_context      jh;
+#if  defined(__VAES__)
+     echo_4way_context       echo;
+#else
+     hashState_echo          echo;
+#endif
+     sph_gost512_context     gost;
+     skein512_8way_context   skein;
+} phi2_8way_ctx_holder;
+
+void phi2_8way_hash( void *state, const void *input )
+{
+   unsigned char _ALIGN(128) hash[64*8];
+   unsigned char _ALIGN(128) hashA[64*2];
+   unsigned char _ALIGN(64) hash0[64];
+   unsigned char _ALIGN(64) hash1[64];
+   unsigned char _ALIGN(64) hash2[64];
+   unsigned char _ALIGN(64) hash3[64];
+   unsigned char _ALIGN(64) hash4[64];
+   unsigned char _ALIGN(64) hash5[64];
+   unsigned char _ALIGN(64) hash6[64];
+   unsigned char _ALIGN(64) hash7[64];
+   const int size = phi2_has_roots ? 144 : 80 ;
+   phi2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
+
+   cubehash_full( &ctx.cube, (byte*)hash0, 512,
+                       (const byte*)input,         size );
+   cubehash_full( &ctx.cube, (byte*)hash1, 512,
+                       (const byte*)input +   144, size );
+   cubehash_full( &ctx.cube, (byte*)hash2, 512,
+                       (const byte*)input + 2*144, size );
+   cubehash_full( &ctx.cube, (byte*)hash3, 512,
+                       (const byte*)input + 3*144, size );
+   cubehash_full( &ctx.cube, (byte*)hash4, 512,
+                       (const byte*)input + 4*144, size );
+   cubehash_full( &ctx.cube, (byte*)hash5, 512,
+                       (const byte*)input + 5*144, size );
+   cubehash_full( &ctx.cube, (byte*)hash6, 512,
+                       (const byte*)input + 6*144, size );
+   cubehash_full( &ctx.cube, (byte*)hash7, 512,
+                       (const byte*)input + 7*144, size );
+
+   intrlv_2x256( hashA, hash0, hash1, 512 );
+   LYRA2RE_2WAY( hash,        32, hashA,        32, 1, 8, 8 );
+   LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
+   dintrlv_2x256( hash0, hash1, hash, 512 );
+   intrlv_2x256( hashA, hash2, hash3, 512 );
+   LYRA2RE_2WAY( hash,        32, hashA,        32, 1, 8, 8 );
+   LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
+   dintrlv_2x256( hash2, hash3, hash, 512 );
+   intrlv_2x256( hashA, hash4, hash5, 512 );
+   LYRA2RE_2WAY( hash,        32, hashA,        32, 1, 8, 8 );
+   LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
+   dintrlv_2x256( hash4, hash5, hash, 512 );
+   intrlv_2x256( hashA, hash6, hash7, 512 );
+   LYRA2RE_2WAY( hash,        32, hashA,        32, 1, 8, 8 );
+   LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
+   dintrlv_2x256( hash6, hash7, hash, 512 );
+   
+   intrlv_8x64_512( hash, hash0, hash1, hash2, hash3,
+                          hash4, hash5, hash6, hash7 );
+
+   jh512_8way_init( &ctx.jh );
+   jh512_8way_update( &ctx.jh, (const void*)hash, 64 );
+   jh512_8way_close( &ctx.jh, (void*)hash );
+
+   dintrlv_8x64_512( hash0, hash1, hash2, hash3,
+                     hash4, hash5, hash6, hash7, hash );
+
+#if defined (__VAES__)
+
+   unsigned char _ALIGN(64) hashA0[64];
+   unsigned char _ALIGN(64) hashA1[64];
+   unsigned char _ALIGN(64) hashA2[64];
+   unsigned char _ALIGN(64) hashA3[64];
+   unsigned char _ALIGN(64) hashA4[64];
+   unsigned char _ALIGN(64) hashA5[64];
+   unsigned char _ALIGN(64) hashA6[64];
+   unsigned char _ALIGN(64) hashA7[64];
+
+   intrlv_4x128_512( hash, hash0, hash1, hash2, hash3 );
+   echo_4way_full( &ctx.echo, hash, 512, hash, 64 ); 
+   echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
+   dintrlv_4x128_512( hashA0, hashA1, hashA2, hashA3, hash );
+
+   intrlv_4x128_512( hash, hash4, hash5, hash6, hash7 );
+   echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
+   echo_4way_full( &ctx.echo, hash, 512, hash, 64 );    
+   dintrlv_4x128_512( hashA4, hashA5, hashA6, hashA7, hash );
+
+#endif    
+
+   if ( hash0[0] & 1 )
+   {
+      sph_gost512_init( &ctx.gost );
+      sph_gost512( &ctx.gost, (const void*)hash0, 64 );
+      sph_gost512_close( &ctx.gost, (void*)hash0 );
+   }
+   else
+#if defined (__VAES__)
+      memcpy( hash0, hashA0, 64 );
+#else
+   {
+      echo_full( &ctx.echo, (BitSequence *)hash0, 512,
+                      (const BitSequence *)hash0, 64 );
+      echo_full( &ctx.echo, (BitSequence *)hash0, 512,
+                      (const BitSequence *)hash0, 64 );
+   }
+#endif
+   if ( hash1[0] & 1 )
+   {
+      sph_gost512_init( &ctx.gost );
+      sph_gost512( &ctx.gost, (const void*)hash1, 64 );
+      sph_gost512_close( &ctx.gost, (void*)hash1 );
+   }
+   else
+#if defined (__VAES__)
+      memcpy( hash1, hashA1, 64 );
+#else
+   {
+      echo_full( &ctx.echo, (BitSequence *)hash1, 512,
+                      (const BitSequence *)hash1, 64 );
+      echo_full( &ctx.echo, (BitSequence *)hash1, 512,
+                      (const BitSequence *)hash1, 64 );
+   }
+#endif
+   if ( hash2[0] & 1 )
+   {
+      sph_gost512_init( &ctx.gost );
+      sph_gost512( &ctx.gost, (const void*)hash2, 64 );
+      sph_gost512_close( &ctx.gost, (void*)hash2 );
+   }
+   else
+#if defined (__VAES__)
+      memcpy( hash2, hashA2, 64 );
+#else 
+   {
+      echo_full( &ctx.echo, (BitSequence *)hash2, 512,
+                      (const BitSequence *)hash2, 64 );
+      echo_full( &ctx.echo, (BitSequence *)hash2, 512,
+                      (const BitSequence *)hash2, 64 );
+   }
+#endif
+   if ( hash3[0] & 1 )
+   {
+      sph_gost512_init( &ctx.gost );
+      sph_gost512( &ctx.gost, (const void*)hash3, 64 );
+      sph_gost512_close( &ctx.gost, (void*)hash3 );
+   }
+   else
+#if defined (__VAES__)
+      memcpy( hash3, hashA3, 64 );
+#else  
+   {
+      echo_full( &ctx.echo, (BitSequence *)hash3, 512,
+                      (const BitSequence *)hash3, 64 );
+      echo_full( &ctx.echo, (BitSequence *)hash3, 512,
+                      (const BitSequence *)hash3, 64 );
+   }
+#endif
+   if ( hash4[0] & 1 )
+   {
+      sph_gost512_init( &ctx.gost );
+      sph_gost512( &ctx.gost, (const void*)hash4, 64 );
+      sph_gost512_close( &ctx.gost, (void*)hash4 );
+   }
+   else
+#if defined (__VAES__)
+      memcpy( hash4, hashA4, 64 );
+#else
+   {
+      echo_full( &ctx.echo, (BitSequence *)hash4, 512,
+                      (const BitSequence *)hash4, 64 );
+      echo_full( &ctx.echo, (BitSequence *)hash4, 512,
+                      (const BitSequence *)hash4, 64 );
+   }
+#endif   
+   if ( hash5[0] & 1 )
+   {
+      sph_gost512_init( &ctx.gost );
+      sph_gost512( &ctx.gost, (const void*)hash5, 64 );
+      sph_gost512_close( &ctx.gost, (void*)hash5 );
+   }
+   else
+#if defined (__VAES__)
+      memcpy( hash5, hashA5, 64 );
+#else
+   {
+      echo_full( &ctx.echo, (BitSequence *)hash5, 512,
+                      (const BitSequence *)hash5, 64 );
+      echo_full( &ctx.echo, (BitSequence *)hash5, 512,
+                      (const BitSequence *)hash5, 64 );
+   }
+#endif   
+   if ( hash6[0] & 1 )
+   {
+      sph_gost512_init( &ctx.gost );
+      sph_gost512( &ctx.gost, (const void*)hash6, 64 );
+      sph_gost512_close( &ctx.gost, (void*)hash6 );
+   }
+   else
+#if defined (__VAES__)
+      memcpy( hash6, hashA6, 64 );
+#else
+   {
+      echo_full( &ctx.echo, (BitSequence *)hash6, 512,
+                      (const BitSequence *)hash6, 64 );
+      echo_full( &ctx.echo, (BitSequence *)hash6, 512,
+                      (const BitSequence *)hash6, 64 );
+   }
+#endif   
+   if ( hash7[0] & 1 )
+   {
+      sph_gost512_init( &ctx.gost );
+      sph_gost512( &ctx.gost, (const void*)hash7, 64 );
+      sph_gost512_close( &ctx.gost, (void*)hash7 );
+   }
+   else
+#if defined (__VAES__)
+      memcpy( hash7, hashA7, 64 );
+#else
+   {
+      echo_full( &ctx.echo, (BitSequence *)hash7, 512,
+                      (const BitSequence *)hash7, 64 );
+      echo_full( &ctx.echo, (BitSequence *)hash7, 512,
+                      (const BitSequence *)hash7, 64 );
+   }
+#endif
+
+   intrlv_8x64_512( hash, hash0, hash1, hash2, hash3,
+                          hash4, hash5, hash6, hash7 );
+
+   skein512_8way_init( &ctx.skein );
+   skein512_8way_update( &ctx.skein, (const void*)hash, 64 );
+   skein512_8way_close( &ctx.skein, (void*)hash );
+
+   for ( int i = 0; i < 4; i++ )
+      casti_m512i( state, i ) = _mm512_xor_si512( casti_m512i( hash, i ),
+                                                  casti_m512i( hash, i+4 ) );
+}
+
+int scanhash_phi2_8way( struct work *work, uint32_t max_nonce,
+                        uint64_t *hashes_done, struct thr_info *mythr )
+{
+   uint32_t _ALIGN(128) hash[16*8];
+   uint32_t _ALIGN(128) edata[36*8];
+   uint32_t *pdata = work->data;
+   uint32_t *ptarget = work->target;
+   uint32_t *hash7 = &(hash[49]);  
+   const uint32_t Htarg = ptarget[7];
+   const uint32_t first_nonce = pdata[19];
+   const uint32_t last_nonce = max_nonce - 8;
+   uint32_t n = first_nonce;
+   const int thr_id = mythr->id;
+   const bool bench = opt_benchmark;
+   if ( bench )      ptarget[7] = 0x00ff;
+
+   phi2_has_roots = false;
+
+   for ( int i = 0; i < 36; i++ )
+   {
+      be32enc( &edata[i], pdata[i] );
+      edata[ i +   36 ] = edata[ i + 2*36 ] = edata[ i + 3*36 ] =
+      edata[ i + 4*36 ] = edata[ i + 5*36 ] = edata[ i + 6*36 ] =
+      edata[ i + 7*36 ] = edata[ i ];
+      if ( i >= 20 && pdata[i] ) phi2_has_roots = true;
+   }
+
+   edata[        19 ] = n;
+   edata[   36 + 19 ] = n+1;
+   edata[ 2*36 + 19 ] = n+2;
+   edata[ 3*36 + 19 ] = n+3;
+   edata[ 4*36 + 19 ] = n+4;
+   edata[ 5*36 + 19 ] = n+5;
+   edata[ 6*36 + 19 ] = n+6;
+   edata[ 7*36 + 19 ] = n+7;
+   
+   do {
+      phi2_8way_hash( hash, edata );
+
+      for ( int lane = 0; lane < 8; lane++ )
+      if ( unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) )
+      {
+         uint64_t _ALIGN(64) lane_hash[8];
+         extr_lane_8x64( lane_hash, hash, lane, 256 );
+         if ( valid_hash( lane_hash, ptarget ) )
+         {
+            be32enc( pdata + 19, n + lane );
+            submit_lane_solution( work, lane_hash, mythr, lane );
+         }
+      }
+      n += 8;
+      edata[        19 ] += 8;
+      edata[   36 + 19 ] += 8;
+      edata[ 2*36 + 19 ] += 8;
+      edata[ 3*36 + 19 ] += 8;
+      edata[ 4*36 + 19 ] += 8;
+      edata[ 5*36 + 19 ] += 8;
+      edata[ 6*36 + 19 ] += 8;
+      edata[ 7*36 + 19 ] += 8;
+   } while ( (n < last_nonce) && !work_restart[thr_id].restart);
+   pdata[19] = n;
+   *hashes_done = n - first_nonce;
+   return 0;
+
+}
+
+#elif defined(PHI2_4WAY)

 typedef struct {
     cubehashParam           cube;
     jh512_4way_context      jh;
+#if  defined(__AES__)
     hashState_echo          echo;
-//     hashState_echo          echo2;
+#else
+     sph_echo512_context     echo;
+#endif
     sph_gost512_context     gost;
     skein512_4way_context   skein;
-} phi2_ctx_holder;
-/*
-phi2_ctx_holder phi2_ctx;
+} phi2_4way_ctx_holder;

-void init_phi2_ctx()
+phi2_4way_ctx_holder phi2_4way_ctx;
+
+void phi2_4way_hash(void *state, const void *input)
 {
-   cubehashInit( &phi2_ctx.cube, 512, 16, 32 );
-   sph_jh512_init(&phi2_ctx.jh);
-   init_echo( &phi2_ctx.echo1, 512 );
-   init_echo( &phi2_ctx.echo2, 512 );
-   sph_gost512_init(&phi2_ctx.gost);
-   sph_skein512_init(&phi2_ctx.skein);
-};
-*/
-void phi2_hash_4way( void *state, const void *input )
-{
-   uint32_t hash[4][16] __attribute__ ((aligned (64)));
-   uint32_t hashA[4][16] __attribute__ ((aligned (64)));
-   uint32_t hashB[4][16] __attribute__ ((aligned (64)));
-   uint32_t vhash[4*16] __attribute__ ((aligned (64)));
+	unsigned char _ALIGN(128) hash[64*4];
+   unsigned char _ALIGN(64) hash0[64];
+   unsigned char _ALIGN(64) hash1[64];
+   unsigned char _ALIGN(64) hash2[64];
+   unsigned char _ALIGN(64) hash3[64];
+   unsigned char _ALIGN(64) hash0A[64];
+   unsigned char _ALIGN(64) hash1A[64];
+   unsigned char _ALIGN(64) hash2A[64];
+   unsigned char _ALIGN(64) hash3A[64];
+   const int size = phi2_has_roots ? 144 : 80 ;
+   phi2_4way_ctx_holder ctx __attribute__ ((aligned (64)));

-//   unsigned char _ALIGN(128) hash[64];
-//	unsigned char _ALIGN(128) hashA[64];
-//	unsigned char _ALIGN(128) hashB[64];
+   cubehash_full( &ctx.cube, (byte*)hash0A, 512,
+                       (const byte*)input,          size );
+   cubehash_full( &ctx.cube, (byte*)hash1A, 512,
+                       (const byte*)input +   144, size );
+   cubehash_full( &ctx.cube, (byte*)hash2A, 512,
+                       (const byte*)input + 2*144, size );
+   cubehash_full( &ctx.cube, (byte*)hash3A, 512,
+                       (const byte*)input + 3*144, size );
+  
+	LYRA2RE( &hash0[ 0], 32, hash0A,    32, hash0A,    32, 1, 8, 8 );
+	LYRA2RE( &hash0[32], 32, hash0A+32, 32, hash0A+32, 32, 1, 8, 8 );
+   LYRA2RE( &hash1[ 0], 32, hash1A,    32, hash1A,    32, 1, 8, 8 );
+   LYRA2RE( &hash1[32], 32, hash1A+32, 32, hash1A+32, 32, 1, 8, 8 );
+   LYRA2RE( &hash2[ 0], 32, hash2A,    32, hash2A,    32, 1, 8, 8 );
+   LYRA2RE( &hash2[32], 32, hash2A+32, 32, hash2A+32, 32, 1, 8, 8 );
+   LYRA2RE( &hash3[ 0], 32, hash3A,    32, hash3A,    32, 1, 8, 8 );
+   LYRA2RE( &hash3[32], 32, hash3A+32, 32, hash3A+32, 32, 1, 8, 8 );

-   phi2_ctx_holder ctx __attribute__ ((aligned (64)));
-//  memcpy( &ctx, &phi2_ctx, sizeof(phi2_ctx) );
-
-   cubehashInit( &ctx.cube, 512, 16, 32 );
-   cubehashUpdateDigest( &ctx.cube, (byte*)hashB[0], (const byte*)input,
-                        phi2_has_roots ? 144 : 80 );
-   cubehashInit( &ctx.cube, 512, 16, 32 );
-   cubehashUpdateDigest( &ctx.cube, (byte*)hashB[1], (const byte*)input+144,
-                        phi2_has_roots ? 144 : 80 );
-   cubehashInit( &ctx.cube, 512, 16, 32 );
-   cubehashUpdateDigest( &ctx.cube, (byte*)hashB[2], (const byte*)input+288,
-                        phi2_has_roots ? 144 : 80 );
-   cubehashInit( &ctx.cube, 512, 16, 32 );
-   cubehashUpdateDigest( &ctx.cube, (byte*)hashB[3], (const byte*)input+432,
-                        phi2_has_roots ? 144 : 80 );
-
-	LYRA2RE( &hashA[0][0], 32, &hashB[0][0], 32, &hashB[0][0], 32, 1, 8, 8 );
-	LYRA2RE( &hashA[0][8], 32, &hashB[0][8], 32, &hashB[0][8], 32, 1, 8, 8 );
-   LYRA2RE( &hashA[1][0], 32, &hashB[1][0], 32, &hashB[1][0], 32, 1, 8, 8 );
-   LYRA2RE( &hashA[1][8], 32, &hashB[1][8], 32, &hashB[1][8], 32, 1, 8, 8 );
-   LYRA2RE( &hashA[2][0], 32, &hashB[2][0], 32, &hashB[2][0], 32, 1, 8, 8 );
-   LYRA2RE( &hashA[2][8], 32, &hashB[2][8], 32, &hashB[2][8], 32, 1, 8, 8 );
-   LYRA2RE( &hashA[3][0], 32, &hashB[3][0], 32, &hashB[3][0], 32, 1, 8, 8 );
-   LYRA2RE( &hashA[3][8], 32, &hashB[3][8], 32, &hashB[3][8], 32, 1, 8, 8 );
-
-   intrlv_4x64( vhash, hashA[0], hashA[1], hashA[2], hashA[3], 512 );
+   intrlv_4x64_512( hash, hash0, hash1, hash2, hash3 );

   jh512_4way_init( &ctx.jh );
-   jh512_4way( &ctx.jh, vhash, 64 );
-   jh512_4way_close( &ctx.jh, vhash );
+   jh512_4way_update( &ctx.jh, (const void*)hash, 64 );
+	jh512_4way_close( &ctx.jh, (void*)hash );

-   dintrlv_4x64( hash[0], hash[1], hash[2], hash[3], vhash, 512 );
+   dintrlv_4x64_512( hash0, hash1, hash2, hash3, hash );

-   if ( hash[0][0] & 1 )
+   if ( hash0[0] & 1 )
  	{
      sph_gost512_init( &ctx.gost );
-      sph_gost512( &ctx.gost, (const void*)hash[0], 64 );
-	   sph_gost512_close( &ctx.gost, (void*)hash[0] );
+      sph_gost512( &ctx.gost, (const void*)hash0, 64 );
+	   sph_gost512_close( &ctx.gost, (void*)hash0 );
 	}
  	else
  	{
-      init_echo( &ctx.echo, 512 );
-      update_final_echo ( &ctx.echo, (BitSequence *)hash[0],
-                          (const BitSequence *)hash[0], 512 );
-      init_echo( &ctx.echo, 512 );
-      update_final_echo ( &ctx.echo, (BitSequence *)hash[0],
-                          (const BitSequence *)hash[0], 512 );
+      echo_full( &ctx.echo, (BitSequence *)hash0, 512,
+                      (const BitSequence *)hash0, 64 );
+      echo_full( &ctx.echo, (BitSequence *)hash0, 512,
+                      (const BitSequence *)hash0, 64 );
 	}
-
-   if ( hash[1][0] & 1 )
+   if ( hash1[0] & 1 )
   {
      sph_gost512_init( &ctx.gost );
-      sph_gost512( &ctx.gost, (const void*)hash[1], 64 );
-      sph_gost512_close( &ctx.gost, (void*)hash[1] );
+      sph_gost512( &ctx.gost, (const void*)hash1, 64 );
+      sph_gost512_close( &ctx.gost, (void*)hash1 );
   }
   else
   {
-      init_echo( &ctx.echo, 512 );
-      update_final_echo ( &ctx.echo, (BitSequence *)hash[1],
-                          (const BitSequence *)hash[1], 512 );
-      init_echo( &ctx.echo, 512 );
-      update_final_echo ( &ctx.echo, (BitSequence *)hash[1],
-                          (const BitSequence *)hash[1], 512 );
+      echo_full( &ctx.echo, (BitSequence *)hash1, 512,
+                      (const BitSequence *)hash1, 64 );
+      echo_full( &ctx.echo, (BitSequence *)hash1, 512,
+                      (const BitSequence *)hash1, 64 );
   }
-
-   if ( hash[2][0] & 1 )
+   if ( hash2[0] & 1 )
   {
      sph_gost512_init( &ctx.gost );
-      sph_gost512( &ctx.gost, (const void*)hash[2], 64 );
-      sph_gost512_close( &ctx.gost, (void*)hash[2] );
+      sph_gost512( &ctx.gost, (const void*)hash2, 64 );
+      sph_gost512_close( &ctx.gost, (void*)hash2 );
   }
   else
   {
-      init_echo( &ctx.echo, 512 );
-      update_final_echo ( &ctx.echo, (BitSequence *)hash[2],
-                          (const BitSequence *)hash[2], 512 );
-      init_echo( &ctx.echo, 512 );
-      update_final_echo ( &ctx.echo, (BitSequence *)hash[2],
-                          (const BitSequence *)hash[2], 512 );
+      echo_full( &ctx.echo, (BitSequence *)hash2, 512,
+                      (const BitSequence *)hash2, 64 );
+      echo_full( &ctx.echo, (BitSequence *)hash2, 512,
+                      (const BitSequence *)hash2, 64 );
   }
-
-   if ( hash[3][0] & 1 )
+   if ( hash3[0] & 1 )
   {
      sph_gost512_init( &ctx.gost );
-      sph_gost512( &ctx.gost, (const void*)hash[3], 64 );
-      sph_gost512_close( &ctx.gost, (void*)hash[3] );
+      sph_gost512( &ctx.gost, (const void*)hash3, 64 );
+      sph_gost512_close( &ctx.gost, (void*)hash3 );
   }
   else
   {
-      init_echo( &ctx.echo, 512 );
-      update_final_echo ( &ctx.echo, (BitSequence *)hash[3],
-                          (const BitSequence *)hash[3], 512 );
-      init_echo( &ctx.echo, 512 );
-      update_final_echo ( &ctx.echo, (BitSequence *)hash[3],
-                          (const BitSequence *)hash[3], 512 );
+      echo_full( &ctx.echo, (BitSequence *)hash3, 512,
+                      (const BitSequence *)hash3, 64 );
+      echo_full( &ctx.echo, (BitSequence *)hash3, 512,
+                      (const BitSequence *)hash3, 64 );
   }

-   intrlv_4x64( vhash, hash[0], hash[1], hash[2], hash[3], 512 );
-   
+   intrlv_4x64_512( hash, hash0, hash1, hash2, hash3 );
+
   skein512_4way_init( &ctx.skein );
-	skein512_4way( &ctx.skein, vhash, 64 );
-	skein512_4way_close( &ctx.skein, vhash );
+	skein512_4way_update( &ctx.skein, (const void*)hash, 64 );
+	skein512_4way_close( &ctx.skein, (void*)hash );

-   for (int i=0; i<4; i++)
-   {
-      ( (uint64_t*)vhash    )[i] ^= ( (uint64_t*)vhash    )[i+4];
-      ( (uint64_t*)vhash+ 8 )[i] ^= ( (uint64_t*)vhash+ 8 )[i+4];
-      ( (uint64_t*)vhash+16 )[i] ^= ( (uint64_t*)vhash+16 )[i+4];
-      ( (uint64_t*)vhash+24 )[i] ^= ( (uint64_t*)vhash+24 )[i+4];
-   }
-//   for ( int i = 0; i < 4; i++ )
-//      casti_m256i( vhash, i ) = _mm256_xor_si256( casti_m256i( vhash, i   ),
-//                                                  casti_m256i( vhash, i+4 ) );

-	memcpy( state, vhash, 128 );
+   for ( int i = 0; i < 4; i++ )
+      casti_m256i( state, i ) = _mm256_xor_si256( casti_m256i( hash, i   ),
+                                                  casti_m256i( hash, i+4 ) );
 }

 int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
 	                     uint64_t *hashes_done, struct thr_info *mythr )
 {
-   uint32_t _ALIGN(128) hash[8];
-   uint32_t _ALIGN(128) edata[36];
-   uint32_t vdata[4][36] __attribute__ ((aligned (64)));
-   uint32_t *hash7 = &(hash[25]);
-   uint32_t lane_hash[8] __attribute__ ((aligned (32)));
+   uint32_t _ALIGN(128) hash[16*4];
+   uint32_t _ALIGN(128) edata[36*4];
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
+   uint32_t *hash7 = &(hash[25]);   // 3*8+1
   const uint32_t Htarg = ptarget[7];
   const uint32_t first_nonce = pdata[19];
+   const uint32_t last_nonce = max_nonce - 4;
   uint32_t n = first_nonce;
-   int thr_id = mythr->id;  // thr_id arg is deprecated
-
-   if(opt_benchmark){
-   	ptarget[7] = 0x00ff;
-   }
-
-// Data is not interleaved, but hash is.
-// any non-zero data at index 20 or above sets roots true.
-// Split up the operations, bswap first, then set roots.
-
-   phi2_has_roots = false;
-   for ( int i=0; i < 36; i++ )
-   {
-   be32enc(&edata[i], pdata[i]);
-   if (i >= 20 && pdata[i]) phi2_has_roots = true;
-   }
-/*
-   casti_m256i( vdata[0], 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );   
-   casti_m256i( vdata[0], 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
-   casti_m256i( vdata[0], 2 ) = mm256_bswap_32( casti_m256i( pdata, 2 ) );
-   casti_m256i( vdata[0], 3 ) = mm256_bswap_32( casti_m256i( pdata, 3 ) );
-   casti_m128i( vdata[0], 8 ) = mm128_bswap_32( casti_m128i( pdata, 8 ) );
-   phi2_has_roots = mm128_anybits1( casti_m128i( vdata[0], 5 ) ) ||
-                    mm128_anybits1( casti_m128i( vdata[0], 6 ) ) ||
-                    mm128_anybits1( casti_m128i( vdata[0], 7 ) ) ||
-                    mm128_anybits1( casti_m128i( vdata[0], 8 ) );
-*/   
-
-   memcpy( vdata[0], edata, 144 );
-   memcpy( vdata[1], edata, 144 );
-   memcpy( vdata[2], edata, 144 );
-   memcpy( vdata[3], edata, 144 );
-
-   do {
-      be32enc( &vdata[0][19], n );
-      be32enc( &vdata[1][19], n+1 );
-      be32enc( &vdata[2][19], n+2 );
-      be32enc( &vdata[3][19], n+3 );
-
-      phi2_hash_4way( hash, vdata );
-
-      for ( int lane = 0; lane < 4; lane++ ) if (  hash7[ lane<<1 ] < Htarg )
-      {
-          extr_lane_4x64( lane_hash, hash, lane, 256 );
-          if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
-          {
-              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
-          }
-       }
-       n += 4;
-    } while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
-    *hashes_done = n - first_nonce + 1;
-    return 0;
-}
+   const int thr_id = mythr->id;
+   const bool bench = opt_benchmark;
+   if ( bench )   	ptarget[7] = 0x00ff;
   
-#endif  // PHI2_4WAY
+   phi2_has_roots = false;
+
+   for ( int i = 0; i < 36; i++ )
+   {
+	   be32enc( &edata[i], pdata[i] );
+      edata[ i+36 ] = edata[ i+72 ] = edata[ i+108 ] = edata[i];
+      if ( i >= 20 && pdata[i] ) phi2_has_roots = true;
+   }
+
+   edata[        19 ] = n;
+   edata[   36 + 19 ] = n+1;
+   edata[ 2*36 + 19 ] = n+2;
+   edata[ 3*36 + 19 ] = n+3;
+   
+   do {
+	   phi2_4way_hash( hash, edata );
+
+      for ( int lane = 0; lane < 4; lane++ )
+      if ( unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) )
+      {
+         uint64_t _ALIGN(64) lane_hash[8]; 
+         extr_lane_4x64( lane_hash, hash, lane, 256 );
+         if ( valid_hash( lane_hash, ptarget ) )
+         {
+            be32enc( pdata + 19, n + lane );
+            submit_lane_solution( work, lane_hash, mythr, lane );
+         }
+      }
+      edata[        19 ] += 4;
+      edata[   36 + 19 ] += 4;
+      edata[ 2*36 + 19 ] += 4;
+      edata[ 3*36 + 19 ] += 4;
+      n +=4;
+   } while ( (n < last_nonce) && !work_restart[thr_id].restart);
+   pdata[19] = n;
+   *hashes_done = n - first_nonce;
+   return 0;
+}
+
+#endif
+
--- a/algo/lyra2/phi2.c
+++ b/algo/lyra2/phi2.c
@@ -99,7 +99,6 @@ int scanhash_phi2( struct work *work, uint32_t max_nonce,
   uint32_t _ALIGN(128) edata[36];
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
-   const uint32_t Htarg = ptarget[7];
   const uint32_t first_nonce = pdata[19];
   uint32_t n = first_nonce;
   const int thr_id = mythr->id;
--- a/algo/m7m/m7m.c
+++ b/algo/m7m/m7m.c
--- a/algo/m7m/magimath.cpp
+++ b/algo/m7m/magimath.cpp
--- a/algo/m7m/magimath.h
+++ b/algo/m7m/magimath.h
--- a/algo/nist5/zr5.c
+++ b/algo/nist5/zr5.c
@@ -158,7 +158,7 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
 {
   // ignore POK in first word
   const int wkcmp_sz = 72;  // (19-1) * sizeof(uint32_t)
-   uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
+   uint32_t *nonceptr = work->data + algo_gate.nonce_index;
   if ( memcmp( &work->data[1], &g_work->data[1], wkcmp_sz )
      || ( *nonceptr >= *end_nonce_ptr ) )
   {
--- a/algo/quark/anime-4way.c
+++ b/algo/quark/anime-4way.c
@@ -162,59 +162,35 @@ int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
                         uint64_t *hashes_done, struct thr_info *mythr )
 {
    uint32_t hash[4*8] __attribute__ ((aligned (64)));
-    uint32_t vdata[24*4] __attribute__ ((aligned (64)));
+    uint32_t vdata[20*4] __attribute__ ((aligned (64)));
    uint32_t *pdata = work->data;
    uint32_t *ptarget = work->target;
    uint32_t n = pdata[19];
    const uint32_t first_nonce = pdata[19];
+    const uint32_t last_nonce = max_nonce - 4;
    __m256i  *noncev = (__m256i*)vdata + 9;   // aligned
-    int thr_id = mythr->id;  // thr_id arg is deprecated
-    const uint32_t Htarg = ptarget[7];
-    uint64_t htmax[] = {
-                0,
-                0xF,
-                0xFF,
-                0xFFF,
-                0xFFFF,
-                0x10000000
-        };
-    uint32_t masks[] = {
-                0xFFFFFFFF,
-                0xFFFFFFF0,
-                0xFFFFFF00,
-                0xFFFFF000,
-                0xFFFF0000,
-                0
-        };
+    const int thr_id = mythr->id;  

    mm256_bswap32_intrlv80_4x64( vdata, pdata );
+    *noncev = mm256_intrlv_blend_32(
+                   _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );

-    for (int m=0; m < 6; m++)
-       if (Htarg <= htmax[m])
+    do
+    {
+       anime_4way_hash( hash, vdata );
+
+       for ( int i = 0; i < 4; i++ )
+       if ( valid_hash( hash+(i<<3), ptarget ) && !opt_benchmark )
       {
-          uint32_t mask = masks[m];
-
-          do
-          {
-             *noncev = mm256_intrlv_blend_32( mm256_bswap_32(
-                _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
-
-             anime_4way_hash( hash, vdata );
-             pdata[19] = n;
-
-             for ( int i = 0; i < 4; i++ )
-             if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
-                && fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
-             {
-                pdata[19] = n+i;
-                submit_lane_solution( work, hash+(i<<3), mythr, i );
-             }
-             n += 4;
-          } while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
-          break;
+          pdata[19] = bswap_32( n+i );
+          submit_solution( work, hash+(i<<3), mythr );
       }
-
-    *hashes_done = n - first_nonce + 1;
+       *noncev = _mm256_add_epi32( *noncev,
+                                   m256_const1_64( 0x0000000400000000 ) );
+       n += 4;
+    } while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
+    pdata[19] = n;
+    *hashes_done = n - first_nonce;
    return 0;
 }

--- a/algo/quark/anime.c
+++ b/algo/quark/anime.c
@@ -126,49 +126,28 @@ int scanhash_anime( struct work *work, uint32_t max_nonce,
                         uint64_t *hashes_done, struct thr_info *mythr)
 {
    uint32_t hash[8] __attribute__ ((aligned (64)));
-    uint32_t endiandata[20] __attribute__((aligned(64)));
+    uint32_t edata[20] __attribute__((aligned(64)));
    uint32_t *pdata = work->data;
    uint32_t *ptarget = work->target;
    uint32_t n = pdata[19];
    const uint32_t first_nonce = pdata[19];
-    int thr_id = mythr->id;  // thr_id arg is deprecated
-    const uint32_t Htarg = ptarget[7];
-    uint64_t htmax[] = {
-                0,
-                0xF,
-                0xFF,
-                0xFFF,
-                0xFFFF,
-                0x10000000
-        };
-    uint32_t masks[] = {
-                0xFFFFFFFF,
-                0xFFFFFFF0,
-                0xFFFFFF00,
-                0xFFFFF000,
-                0xFFFF0000,
-                0
-        };
+    const int thr_id = mythr->id;
+    const int bench = opt_benchmark;
+    
+    swab32_array( edata, pdata, 20 );

-    swab32_array( endiandata, pdata, 20 );
-
-    for (int m=0; m < 6; m++)
-       if (Htarg <= htmax[m])
-       {
-          uint32_t mask = masks[m];
-          do
-          {
-              be32enc( &endiandata[19], n );
-              anime_hash( hash, endiandata );
-              pdata[19] = n;
-
-             if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) ) 
-                submit_solution( work, hash, mythr );
-             n++;
-          } while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
-          break;
-       }
-    *hashes_done = n - first_nonce + 1;
+    do
+    {
+        edata[19] = n;
+        anime_hash( hash, edata );
+        if ( valid_hash( hash, ptarget ) && !bench )
+        {
+           be32enc( &pdata[19], n );
+           submit_solution( work, hash, mythr );
+        }
+        n++;
+    } while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
+    *hashes_done = n - first_nonce;
    pdata[19] = n;
    return 0;
 }
--- a/algo/quark/hmq1725-4way.c
+++ b/algo/quark/hmq1725-4way.c
@@ -1028,7 +1028,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)

 // B

-     if ( mm256_anybits1( vh_mask ) )
+     if ( mm256_anybits0( vh_mask ) )
     {
       skein512_4way_init( &ctx.skein );
       skein512_4way_update( &ctx.skein, vhash, 64 );
@@ -1050,14 +1050,14 @@ extern void hmq1725_4way_hash(void *state, const void *input)
     vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
                                   m256_zero );

-     if ( mm256_anybits0( vh_mask ) )
+     if ( mm256_anybits1( vh_mask ) )
     {
       blake512_4way_init( &ctx.blake );
       blake512_4way_update( &ctx.blake, vhash, 64 );
       blake512_4way_close( &ctx.blake, vhashA );
     }

-     if ( mm256_anybits1( vh_mask ) )
+     if ( mm256_anybits0( vh_mask ) )
     {
       bmw512_4way_init( &ctx.bmw );
       bmw512_4way_update( &ctx.bmw, vhash, 64 );
@@ -1101,14 +1101,14 @@ extern void hmq1725_4way_hash(void *state, const void *input)
     vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
                                  m256_zero );

-     if ( mm256_anybits0( vh_mask ) )
+     if ( mm256_anybits1( vh_mask ) )
     {
        keccak512_4way_init( &ctx.keccak );
        keccak512_4way_update( &ctx.keccak, vhash, 64 );
        keccak512_4way_close( &ctx.keccak, vhashA );
     }

-     if ( mm256_anybits1( vh_mask ) )
+     if ( mm256_anybits0( vh_mask ) )
     {
        jh512_4way_init( &ctx.jh );
        jh512_4way_update( &ctx.jh, vhash, 64 );
@@ -1180,7 +1180,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
     intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );

 // B
-     if ( mm256_anybits1( vh_mask ) )
+     if ( mm256_anybits0( vh_mask ) )
     {
        haval256_5_4way_init( &ctx.haval );
        haval256_5_4way_update( &ctx.haval, vhash, 64 );
@@ -1407,7 +1407,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)

   intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );

-   if ( mm256_anybits1( vh_mask ) )
+   if ( mm256_anybits0( vh_mask ) )
   {
      sha512_4way_init( &ctx.sha512 );
      sha512_4way_update( &ctx.sha512, vhash, 64 );
@@ -1443,7 +1443,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
   // 4x32 for haval
   intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );

-   if ( mm256_anybits0( vh_mask ) )
+   if ( mm256_anybits1( vh_mask ) )
   {
      haval256_5_4way_init( &ctx.haval );
      haval256_5_4way_update( &ctx.haval, vhash, 64 );
--- a/algo/quark/quark-4way.c
+++ b/algo/quark/quark-4way.c
@@ -402,14 +402,14 @@ void quark_4way_hash( void *state, const void *input )

    vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );

-    if ( mm256_anybits0( vh_mask ) )   
+    if ( mm256_anybits1( vh_mask ) )   
    {
       blake512_4way_init( &ctx.blake );
       blake512_4way_update( &ctx.blake, vhash, 64 );
       blake512_4way_close( &ctx.blake, vhashA );
    }

-    if ( mm256_anybits1( vh_mask ) )
+    if ( mm256_anybits0( vh_mask ) )
    {
       bmw512_4way_init( &ctx.bmw );
       bmw512_4way_update( &ctx.bmw, vhash, 64 );
@@ -427,14 +427,14 @@ void quark_4way_hash( void *state, const void *input )

    vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );

-    if ( mm256_anybits0( vh_mask ) )    
+    if ( mm256_anybits1( vh_mask ) )    
    {
       keccak512_4way_init( &ctx.keccak );
       keccak512_4way_update( &ctx.keccak, vhash, 64 );
       keccak512_4way_close( &ctx.keccak, vhashA );
    }

-    if ( mm256_anybits1( vh_mask ) )
+    if ( mm256_anybits0( vh_mask ) )
    {
       jh512_4way_init( &ctx.jh );
       jh512_4way_update( &ctx.jh, vhash, 64 );
--- a/algo/skein/skein-4way.c
+++ b/algo/skein/skein-4way.c
@@ -13,17 +13,21 @@

 #if defined (SKEIN_8WAY)

+static __thread skein512_8way_context skein512_8way_ctx
+                                            __attribute__ ((aligned (64)));
+
 void skeinhash_8way( void *state, const void *input )
 {
     uint64_t vhash64[8*8] __attribute__ ((aligned (128)));
     skein512_8way_context ctx_skein;
-
+     memcpy( &ctx_skein, &skein512_8way_ctx, sizeof( ctx_skein ) );
     uint32_t vhash32[16*8] __attribute__ ((aligned (128)));
     sha256_8way_context ctx_sha256;

-     skein512_8way_init( &ctx_skein );
-     skein512_8way_update( &ctx_skein, input, 80 );
-     skein512_8way_close( &ctx_skein, vhash64 );
+     skein512_8way_full( &ctx_skein, vhash64, input, 80 );
+     
+//     skein512_8way_update( &ctx_skein, input + (64*8), 16 );
+//     skein512_8way_close( &ctx_skein, vhash64 );

     rintrlv_8x64_8x32( vhash32, vhash64, 512 );

@@ -36,63 +40,74 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
                         uint64_t *hashes_done, struct thr_info *mythr )
 {
    uint32_t vdata[20*8] __attribute__ ((aligned (128)));
-    uint32_t hash[16*8] __attribute__ ((aligned (64)));
+    uint32_t hash[8*8] __attribute__ ((aligned (64)));
    uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-    uint32_t *hash7 = &(hash[7<<3]);
+    uint32_t *hash_d7 = &(hash[7*8]);
    uint32_t *pdata = work->data;
    uint32_t *ptarget = work->target;
-    const uint32_t Htarg = ptarget[7];
+    const uint32_t targ_d7 = ptarget[7];
    const uint32_t first_nonce = pdata[19];
+    const uint32_t last_nonce = max_nonce - 8;
    uint32_t n = first_nonce;
-    __m512i  *noncev = (__m512i*)vdata + 9;   // aligned
-    int thr_id = mythr->id; 
+    __m512i  *noncev = (__m512i*)vdata + 9; 
+    const int thr_id = mythr->id; 
+    const bool bench = opt_benchmark;

   mm512_bswap32_intrlv80_8x64( vdata, pdata );
+   *noncev = mm512_intrlv_blend_32(
+                _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
+                                  n+3, 0, n+2, 0, n+1, 0, n  , 0 ), *noncev );
+//   skein512_8way_init( &skein512_8way_ctx );
+//   skein512_8way_update( &skein512_8way_ctx, vdata, 64 );
   do
   {
-       *noncev = mm512_intrlv_blend_32( mm512_bswap_32(
-                _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
-                                  n+3, 0, n+2, 0, n+1, 0, n  , 0 ) ), *noncev );
-
       skeinhash_8way( hash, vdata );

       for ( int lane = 0; lane < 8; lane++ )
-       if (  hash7[ lane ] <= Htarg )
+       if ( unlikely( hash_d7[ lane ] <= targ_d7 ) && !bench )
       {
          extr_lane_8x32( lane_hash, hash, lane, 256 );
-          if ( fulltest( lane_hash, ptarget ) )
+          if ( valid_hash( lane_hash, ptarget ) )
          {
-             pdata[19] = n + lane;
+             pdata[19] = bswap_32( n + lane );
             submit_lane_solution( work, lane_hash, mythr, lane );
          }
       }
+       *noncev = _mm512_add_epi32( *noncev,
+                                  m512_const1_64( 0x0000000800000000 ) );
       n += 8;
-    } while ( (n < max_nonce-8) && !work_restart[thr_id].restart );
+    } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );

+    pdata[19] = n;
    *hashes_done = n - first_nonce;
    return 0;
 }

 #elif defined (SKEIN_4WAY)

+//static __thread skein512_4way_context skein512_4way_ctx
+//                                            __attribute__ ((aligned (64)));
+
 void skeinhash_4way( void *state, const void *input )
 {
     uint64_t vhash64[8*4] __attribute__ ((aligned (128)));
     skein512_4way_context ctx_skein;
+//     memcpy( &ctx_skein, &skein512_4way_ctx, sizeof( ctx_skein ) );
 #if defined(__SHA__)
     uint32_t hash0[16] __attribute__ ((aligned (64)));
     uint32_t hash1[16] __attribute__ ((aligned (64)));
     uint32_t hash2[16] __attribute__ ((aligned (64)));
     uint32_t hash3[16] __attribute__ ((aligned (64)));
-     SHA256_CTX           ctx_sha256;
+     SHA256_CTX ctx_sha256;
 #else
     uint32_t vhash32[16*4] __attribute__ ((aligned (64)));
     sha256_4way_context ctx_sha256;
 #endif

-     skein512_4way_init( &ctx_skein );
-     skein512_4way_update( &ctx_skein, input, 80 );
-     skein512_4way_close( &ctx_skein, vhash64 );
+     skein512_4way_full( &ctx_skein, vhash64, input, 80 );
+
+//     skein512_4way_update( &ctx_skein, input + (64*4), 16 );
+//     skein512_4way_close( &ctx_skein, vhash64 );

 #if defined(__SHA__)      
     dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 512 );
@@ -127,38 +142,44 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
                         uint64_t *hashes_done, struct thr_info *mythr )
 {
    uint32_t vdata[20*4] __attribute__ ((aligned (64)));
-    uint32_t hash[16*4] __attribute__ ((aligned (64)));
+    uint32_t hash[8*4] __attribute__ ((aligned (64)));
    uint32_t lane_hash[8] __attribute__ ((aligned (32)));
-    uint32_t *hash7 = &(hash[7<<2]);
+    uint32_t *hash_d7 = &(hash[7<<2]);
    uint32_t *pdata = work->data;
    uint32_t *ptarget = work->target;
-    const uint32_t Htarg = ptarget[7];
+    const uint32_t targ_d7 = ptarget[7];
    const uint32_t first_nonce = pdata[19];
+    const uint32_t last_nonce = max_nonce - 4;
    uint32_t n = first_nonce;
-    __m256i  *noncev = (__m256i*)vdata + 9;   // aligned
-    int thr_id = mythr->id; 
+    __m256i  *noncev = (__m256i*)vdata + 9; 
+    const int thr_id = mythr->id; 
+    const bool bench = opt_benchmark;

   mm256_bswap32_intrlv80_4x64( vdata, pdata );
+//   skein512_4way_init( &skein512_4way_ctx );
+//   skein512_4way_update( &skein512_4way_ctx, vdata, 64 );
+
+   *noncev = mm256_intrlv_blend_32(
+                _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
   do
   {
-       *noncev = mm256_intrlv_blend_32( mm256_bswap_32(
-                _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
-
       skeinhash_4way( hash, vdata );
-
       for ( int lane = 0; lane < 4; lane++ )
-       if (  hash7[ lane ] <= Htarg )
+       if ( unlikely( ( hash_d7[ lane ] <= targ_d7 ) && !bench ) )
       {
          extr_lane_4x32( lane_hash, hash, lane, 256 );
-          if ( fulltest( lane_hash, ptarget ) )
+          if ( valid_hash( lane_hash, ptarget ) )
          {
-             pdata[19] = n + lane;
+             pdata[19] = bswap_32( n + lane );
             submit_lane_solution( work, lane_hash, mythr, lane );
          }
       }
+       *noncev = _mm256_add_epi32( *noncev,
+                                  m256_const1_64( 0x0000000400000000 ) );
       n += 4;
-    } while ( (n < max_nonce-4) && !work_restart[thr_id].restart );
+    } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );

+    pdata[19] = n;
    *hashes_done = n - first_nonce;
    return 0;
 }
--- a/algo/skein/skein-gate.c
+++ b/algo/skein/skein-gate.c
@@ -4,14 +4,16 @@

 bool register_skein_algo( algo_gate_t* gate )
 {
-    gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
 #if defined (SKEIN_8WAY)
+    gate->optimizations = AVX2_OPT | AVX512_OPT;
    gate->scanhash  = (void*)&scanhash_skein_8way;
    gate->hash      = (void*)&skeinhash_8way;
 #elif defined (SKEIN_4WAY)
+    gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
    gate->scanhash  = (void*)&scanhash_skein_4way;
    gate->hash      = (void*)&skeinhash_4way;
 #else
+    gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
    gate->scanhash  = (void*)&scanhash_skein;
    gate->hash      = (void*)&skeinhash;
 #endif
--- a/algo/skein/skein-hash-4way.c
+++ b/algo/skein/skein-hash-4way.c
@@ -654,6 +654,80 @@ skein_big_close_8way( skein512_8way_context *sc, unsigned ub, unsigned n,
   memcpy_512( dst, buf, out_len >> 3 );
 }

+void skein512_8way_full( skein512_8way_context *sc, void *out, const void *data,
+                     size_t len )
+{
+   __m512i h0, h1, h2, h3, h4, h5, h6, h7;
+   __m512i *vdata = (__m512i*)data;
+   __m512i *buf = sc->buf;
+   size_t ptr = 0;
+   unsigned first;
+   uint64_t bcount = 0;
+   const int buf_size = 64;   // 64 * _m256i
+
+// Init
+
+        h0 = m512_const1_64( 0x4903ADFF749C51CE );
+        h1 = m512_const1_64( 0x0D95DE399746DF03 );
+        h2 = m512_const1_64( 0x8FD1934127C79BCE );
+        h3 = m512_const1_64( 0x9A255629FF352CB1 );
+        h4 = m512_const1_64( 0x5DB62599DF6CA7B0 );
+        h5 = m512_const1_64( 0xEABE394CA9D5C3F4 );
+        h6 = m512_const1_64( 0x991112C71A75B523 );
+        h7 = m512_const1_64( 0xAE18A40B660FCC33 );
+
+// Update
+
+   if ( len <= buf_size - ptr )
+   {
+       memcpy_512( buf + (ptr>>3), vdata, len>>3 );
+       ptr += len;
+   }
+   else
+   {
+      first = ( bcount == 0 ) << 7;
+      do {
+         size_t clen;
+
+         if ( ptr == buf_size )
+         {
+            bcount ++;
+            UBI_BIG_8WAY( 96 + first, 0 );
+            first = 0;
+            ptr = 0;
+         }
+         clen = buf_size - ptr;
+         if ( clen > len )
+            clen = len;
+         memcpy_512( buf + (ptr>>3), vdata, clen>>3 );
+         ptr += clen;
+         vdata += (clen>>3);
+         len -= clen;
+      } while ( len > 0 );
+   }
+
+// Close
+
+   unsigned et;
+
+   memset_zero_512( buf + (ptr>>3), (buf_size - ptr) >> 3 );
+   et = 352 + ((bcount == 0) << 7);
+   UBI_BIG_8WAY( et, ptr );
+
+   memset_zero_512( buf, buf_size >> 3 );
+   bcount = 0;
+   UBI_BIG_8WAY( 510, 8 );
+
+   casti_m512i( out, 0 ) = h0;
+   casti_m512i( out, 1 ) = h1;
+   casti_m512i( out, 2 ) = h2;
+   casti_m512i( out, 3 ) = h3;
+   casti_m512i( out, 4 ) = h4;
+   casti_m512i( out, 5 ) = h5;
+   casti_m512i( out, 6 ) = h6;
+   casti_m512i( out, 7 ) = h7;
+}
+
 void
 skein256_8way_update(void *cc, const void *data, size_t len)
 {
@@ -709,6 +783,7 @@ void skein512_4way_init( skein512_4way_context *sc )
        sc->ptr = 0;
 }

+// Do not use for 128 bt data length
 static void
 skein_big_core_4way( skein512_4way_context *sc, const void *data,
                     size_t len )
@@ -794,6 +869,79 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
   memcpy_256( dst, buf, out_len >> 3 );
 }

+void
+skein512_4way_full( skein512_4way_context *sc, void *out, const void *data,
+                     size_t len )
+{
+   __m256i h0, h1, h2, h3, h4, h5, h6, h7;
+   __m256i *vdata = (__m256i*)data;
+   __m256i *buf = sc->buf;
+   size_t ptr = 0;
+   unsigned first;
+   const int buf_size = 64;   // 64 * __m256i
+   uint64_t bcount = 0;
+
+   h0 = m256_const1_64( 0x4903ADFF749C51CE );
+   h1 = m256_const1_64( 0x0D95DE399746DF03 );
+   h2 = m256_const1_64( 0x8FD1934127C79BCE );
+   h3 = m256_const1_64( 0x9A255629FF352CB1 );
+   h4 = m256_const1_64( 0x5DB62599DF6CA7B0 );
+   h5 = m256_const1_64( 0xEABE394CA9D5C3F4 );
+   h6 = m256_const1_64( 0x991112C71A75B523 );
+   h7 = m256_const1_64( 0xAE18A40B660FCC33 );
+
+// Update     
+
+   if ( len <= buf_size - ptr )
+   {
+       memcpy_256( buf + (ptr>>3), vdata, len>>3 );
+       ptr += len;
+   }
+   else
+   {
+      first = ( bcount == 0 ) << 7;
+      do {
+         size_t clen;
+
+         if ( ptr == buf_size )
+         {
+            bcount ++;
+            UBI_BIG_4WAY( 96 + first, 0 );
+            first = 0;
+            ptr = 0;
+         }
+         clen = buf_size - ptr;
+         if ( clen > len )
+            clen = len;
+         memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
+         ptr += clen;
+         vdata += (clen>>3);
+         len -= clen;
+      } while ( len > 0 );
+   }
+
+// Close
+
+   unsigned et;
+
+   memset_zero_256( buf + (ptr>>3), (buf_size - ptr) >> 3 );
+   et = 352 + ((bcount == 0) << 7);
+   UBI_BIG_4WAY( et, ptr );
+
+   memset_zero_256( buf, buf_size >> 3 );
+   bcount = 0;
+   UBI_BIG_4WAY( 510, 8 );
+
+   casti_m256i( out, 0 ) = h0;
+   casti_m256i( out, 1 ) = h1;
+   casti_m256i( out, 2 ) = h2;
+   casti_m256i( out, 3 ) = h3;
+   casti_m256i( out, 4 ) = h4;
+   casti_m256i( out, 5 ) = h5;
+   casti_m256i( out, 6 ) = h6;
+   casti_m256i( out, 7 ) = h7;
+}
+
 void
 skein256_4way_update(void *cc, const void *data, size_t len)
 {
@@ -806,6 +954,9 @@ skein256_4way_close(void *cc, void *dst)
        skein_big_close_4way(cc, 0, 0, dst, 32);
 }

+
+
+// Do not use with 128 bit data
 void
 skein512_4way_update(void *cc, const void *data, size_t len)
 {
--- a/algo/skein/skein-hash-4way.h
+++ b/algo/skein/skein-hash-4way.h
@@ -63,6 +63,8 @@ typedef struct
 typedef skein_8way_big_context skein512_8way_context;
 typedef skein_8way_big_context skein256_8way_context;

+void skein512_8way_full( skein512_8way_context *sc, void *out,
+                         const void *data, size_t len );
 void skein512_8way_init( skein512_8way_context *sc );
 void skein512_8way_update( void *cc, const void *data, size_t len );
 void skein512_8way_close( void *cc, void *dst );
@@ -85,6 +87,8 @@ typedef skein_4way_big_context skein512_4way_context;
 typedef skein_4way_big_context skein256_4way_context;

 void skein512_4way_init( skein512_4way_context *sc );
+void skein512_4way_full( skein512_4way_context *sc, void *out,
+                         const void *data, size_t len );
 void skein512_4way_update( void *cc, const void *data, size_t len );
 void skein512_4way_close( void *cc, void *dst );

--- a/algo/skein/skein2-4way.c
+++ b/algo/skein/skein2-4way.c
@@ -5,114 +5,131 @@

 #if defined(SKEIN_8WAY)

+// static __thread skein512_8way_context skein512_8way_ctx
+//                                             __attribute__ ((aligned (64)));
+
 void skein2hash_8way( void *output, const void *input )
 {
-   skein512_8way_context ctx;
   uint64_t hash[16*8] __attribute__ ((aligned (128)));
+   skein512_8way_context ctx;
+//   memcpy( &ctx, &skein512_8way_ctx, sizeof( ctx ) );

-   skein512_8way_init( &ctx );
-   skein512_8way_update( &ctx, input, 80 );
-   skein512_8way_close( &ctx, hash );
+   skein512_8way_full( &ctx, hash, input, 80 );

-   skein512_8way_init( &ctx );
-   skein512_8way_update( &ctx, hash, 64 );
-   skein512_8way_close( &ctx, output );
+//   skein512_8way_update( &ctx, input + (64*8), 16 );
+//   skein512_8way_close( &ctx, hash );
+
+   skein512_8way_full( &ctx, output, hash, 64 );
 }

 int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
                          uint64_t *hashes_done, struct thr_info *mythr )
 {
-    uint32_t hash[16*8] __attribute__ ((aligned (128)));
+    uint64_t hash[8*8] __attribute__ ((aligned (128)));
    uint32_t vdata[20*8] __attribute__ ((aligned (64)));
    uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-    uint32_t *hash7 = &(hash[49]);
+    uint64_t *hashq3 = &(hash[3*8]);
    uint32_t *pdata = work->data;
    uint32_t *ptarget = work->target;
-    const uint32_t Htarg = ptarget[7];
+    const uint64_t targq3 = ((uint64_t*)ptarget)[3];
    const uint32_t first_nonce = pdata[19];
+    const uint32_t last_nonce = max_nonce - 8;
    uint32_t n = first_nonce;
-    __m512i  *noncev = (__m512i*)vdata + 9;   // aligned
-    int thr_id = mythr->id; 
+    __m512i  *noncev = (__m512i*)vdata + 9; 
+    const int thr_id = mythr->id; 
+    const bool bench = opt_benchmark;

    mm512_bswap32_intrlv80_8x64( vdata, pdata );
+    *noncev = mm512_intrlv_blend_32(
+                _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
+                                  n+3, 0, n+2, 0, n+1, 0, n  , 0 ), *noncev );
+//    skein512_8way_init( &skein512_8way_ctx );
+//    skein512_8way_update( &skein512_8way_ctx, vdata, 64 );
    do
    {
-       *noncev = mm512_intrlv_blend_32( mm512_bswap_32(
-                _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
-                                  n+3, 0, n+2, 0, n+1, 0, n  , 0 ) ), *noncev );
-
       skein2hash_8way( hash, vdata );

       for ( int lane = 0; lane < 8; lane++ )
-       if ( hash7[ lane<<1 ] <= Htarg )
+       if ( unlikely( hashq3[ lane ] <= targq3 && !bench ) )
       {
          extr_lane_8x64( lane_hash, hash, lane, 256 );
-          if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
+          if ( valid_hash( lane_hash, ptarget ) && !bench )
          {
-             pdata[19] = n + lane;
+             pdata[19] = bswap_32( n + lane );
             submit_lane_solution( work, lane_hash, mythr, lane );
          }
       }
+       *noncev = _mm512_add_epi32( *noncev,
+                                  m512_const1_64( 0x0000000800000000 ) );
       n += 8;
-    } while ( (n < max_nonce-8) && !work_restart[thr_id].restart );
+    } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );

-    *hashes_done = n - first_nonce + 1;
+    pdata[19] = n;
+    *hashes_done = n - first_nonce;
    return 0;
 }

 #elif defined(SKEIN_4WAY)

+//static __thread skein512_4way_context skein512_4way_ctx
+//                                           __attribute__ ((aligned (64)));
+
 void skein2hash_4way( void *output, const void *input )
 {
   skein512_4way_context ctx;
+//   memcpy( &ctx, &skein512_4way_ctx, sizeof( ctx ) ); 
   uint64_t hash[16*4] __attribute__ ((aligned (64)));

-   skein512_4way_init( &ctx );
-   skein512_4way_update( &ctx, input, 80 );
-   skein512_4way_close( &ctx, hash );
+//   skein512_4way_update( &ctx, input + (64*4), 16 );
+//   skein512_4way_close( &ctx, hash );

-   skein512_4way_init( &ctx );
-   skein512_4way_update( &ctx, hash, 64 );
-   skein512_4way_close( &ctx, output );
+   skein512_4way_full( &ctx, hash, input, 80 );
+   skein512_4way_full( &ctx, output, hash, 64 );
 }

 int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
                          uint64_t *hashes_done, struct thr_info *mythr )
 {
-    uint32_t hash[16*4] __attribute__ ((aligned (64)));
+    uint64_t hash[8*4] __attribute__ ((aligned (64)));
    uint32_t vdata[20*4] __attribute__ ((aligned (64)));
    uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-    uint32_t *hash7 = &(hash[25]);
+    uint64_t *hash_q3 = &(hash[3*4]);
    uint32_t *pdata = work->data;
    uint32_t *ptarget = work->target;
-    const uint32_t Htarg = ptarget[7];
+    const uint64_t targ_q3 = ((uint64_t*)ptarget)[3];
    const uint32_t first_nonce = pdata[19];
+    const uint32_t last_nonce = max_nonce - 4;
    uint32_t n = first_nonce;
-    __m256i  *noncev = (__m256i*)vdata + 9;   // aligned
-    int thr_id = mythr->id;  // thr_id arg is deprecated
+    __m256i  *noncev = (__m256i*)vdata + 9; 
+    const int thr_id = mythr->id;  
+    const bool bench = opt_benchmark;

    mm256_bswap32_intrlv80_4x64( vdata, pdata );
+//    skein512_4way_init( &skein512_4way_ctx );
+//    skein512_4way_update( &skein512_4way_ctx, vdata, 64 );
+    *noncev = mm256_intrlv_blend_32(
+                _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
    do 
    {
-       *noncev = mm256_intrlv_blend_32( mm256_bswap_32(
-                _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
-
       skein2hash_4way( hash, vdata );

       for ( int lane = 0; lane < 4; lane++ )
-       if ( hash7[ lane<<1 ] <= Htarg )
+       if ( hash_q3[ lane ] <= targ_q3 )
       {
          extr_lane_4x64( lane_hash, hash, lane, 256 );
-          if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
+          if ( valid_hash( lane_hash, ptarget ) && !bench )
          {
-             pdata[19] = n + lane;
+             pdata[19] = bswap_32( n + lane );
             submit_lane_solution( work, lane_hash, mythr, lane );
          }
       }
+       *noncev = _mm256_add_epi32( *noncev,
+                                  m256_const1_64( 0x0000000400000000 ) );
       n += 4;
-    } while ( (n < max_nonce) && !work_restart[thr_id].restart );
+    } while ( (n < last_nonce) && !work_restart[thr_id].restart );

-    *hashes_done = n - first_nonce + 1;
+    pdata[19] = n;
+    *hashes_done = n - first_nonce;
    return 0;
 }

--- a/algo/x16/hex.c
+++ b/algo/x16/hex.c
@@ -30,9 +30,6 @@
  #include "algo/groestl/aes_ni/hash-groestl.h"
 #endif

-static __thread uint32_t s_ntime = UINT32_MAX;
-static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
-
 static void hex_getAlgoString(const uint32_t* prevblock, char *output)
 {
   char *sptr = output;
@@ -86,7 +83,7 @@ void hex_hash( void* output, const void* input )
   void *in = (void*) input;
   int size = 80;

-   char elem = hashOrder[0];
+   char elem = x16r_hash_order[0];
   uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';

   for ( int i = 0; i < 16; i++ )
@@ -235,7 +232,7 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
-   const uint32_t last_nonce = max_nonce - 4;
+   const uint32_t last_nonce = max_nonce;
   const int thr_id = mythr->id;
   uint32_t nonce = first_nonce;
   volatile uint8_t *restart = &(work_restart[thr_id].restart);
@@ -244,17 +241,18 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,

   mm128_bswap32_80( edata, pdata );
   
+   static __thread uint32_t s_ntime = UINT32_MAX;
   uint32_t ntime = swab32(pdata[17]);
   if ( s_ntime != ntime )
   {
-      hex_getAlgoString( (const uint32_t*) (&edata[1]), hashOrder );
+      hex_getAlgoString( (const uint32_t*) (&edata[1]), x16r_hash_order );
      s_ntime = ntime;
      if ( opt_debug && !thr_id )
-              applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
+              applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
   }

   // Do midstate prehash on hash functions with block size <= 64 bytes.
-   const char elem = hashOrder[0];
+   const char elem = x16r_hash_order[0];
   const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
   switch ( algo )
   {
--- a/algo/x16/x16r-4way.c
+++ b/algo/x16/x16r-4way.c
@@ -692,14 +692,15 @@ void x16r_4way_hash_generic( void* output, const void* input )
         break;
         case SKEIN:
            if ( i == 0 )
+            {
               skein512_4way_update( &ctx.skein, input + (64<<2), 16 );
+               skein512_4way_close( &ctx.skein, vhash );
+            }
            else
            {
               intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
-               skein512_4way_init( &ctx.skein );
-               skein512_4way_update( &ctx.skein, vhash, size );
+               skein512_4way_full( &ctx.skein, vhash, vhash, size );
            }
-            skein512_4way_close( &ctx.skein, vhash );
            dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
         break;
         case LUFFA:
--- a/algo/x16/x16r.c
+++ b/algo/x16/x16r.c
@@ -134,7 +134,8 @@ void x16r_hash_generic( void* output, const void* input )
         break;
         case ECHO:
 #if defined(__AES__)
-            echo_full( &ctx.echo, hash, 512, in, size );
+            echo_full( &ctx.echo, (BitSequence*)hash, 512,
+                            (const BitSequence*)in, size );
 #else
            sph_echo512_init( &ctx.echo );
            sph_echo512( &ctx.echo, in, size );
@@ -237,7 +238,7 @@ int scanhash_x16r( struct work *work, uint32_t max_nonce,
      nonce++;
   } while ( nonce < max_nonce && !(*restart) );
   pdata[19] = nonce;
-   *hashes_done = pdata[19] - first_nonce + 1;
+   *hashes_done = pdata[19] - first_nonce;
   return 0;
 }

--- a/algo/x16/x16rt.c
+++ b/algo/x16/x16rt.c
@@ -46,7 +46,7 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
      nonce++;
   } while ( nonce < max_nonce && !(*restart) );
   pdata[19] = nonce;
-   *hashes_done = pdata[19] - first_nonce + 1;
+   *hashes_done = pdata[19] - first_nonce;
   return 0;
 }

--- a/algo/x16/x16rv2-4way.c
+++ b/algo/x16/x16rv2-4way.c
@@ -35,9 +35,6 @@

 #if defined (X16RV2_8WAY)

-static __thread uint32_t s_ntime = UINT32_MAX;
-static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
-
 union _x16rv2_8way_context_overlay
 {
    blake512_8way_context   blake;
@@ -96,7 +93,7 @@ void x16rv2_8way_hash( void* output, const void* input )

   for ( int i = 0; i < 16; i++ )
   {
-      const char elem = hashOrder[i];
+      const char elem = x16r_hash_order[i];
      const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';

      switch ( algo )
@@ -651,17 +648,19 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,

   bedata1[0] = bswap_32( pdata[1] );
   bedata1[1] = bswap_32( pdata[2] );
+
+   static __thread uint32_t s_ntime = UINT32_MAX;
   const uint32_t ntime = bswap_32( pdata[17] );
   if ( s_ntime != ntime )
   {
-      x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
+      x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
      s_ntime = ntime;
      if ( opt_debug && !thr_id )
-              applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
+         applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
   }

   // Do midstate prehash on hash functions with block size <= 64 bytes.
-   const char elem = hashOrder[0];
+   const char elem = x16r_hash_order[0];
   const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
   switch ( algo )
   {
@@ -737,9 +736,6 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,

 #elif defined (X16RV2_4WAY)

-static __thread uint32_t s_ntime = UINT32_MAX;
-static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
-
 union _x16rv2_4way_context_overlay
 {
    blake512_4way_context   blake;
@@ -789,7 +785,7 @@ void x16rv2_4way_hash( void* output, const void* input )

   for ( int i = 0; i < 16; i++ )
   {
-      const char elem = hashOrder[i];
+      const char elem = x16r_hash_order[i];
      const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';

      switch ( algo )
@@ -1130,17 +1126,19 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,

   bedata1[0] = bswap_32( pdata[1] );
   bedata1[1] = bswap_32( pdata[2] );
+
+   static __thread uint32_t s_ntime = UINT32_MAX;
   const uint32_t ntime = bswap_32(pdata[17]);
   if ( s_ntime != ntime )
   {
-      x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
+      x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
      s_ntime = ntime;
      if ( opt_debug && !thr_id )
-              applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
+              applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime );
   }

   // Do midstate prehash on hash functions with block size <= 64 bytes.
-   const char elem = hashOrder[0];
+   const char elem = x16r_hash_order[0];
   const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
   switch ( algo )
   {
--- a/algo/x16/x16rv2.c
+++ b/algo/x16/x16rv2.c
@@ -34,7 +34,6 @@
 #endif

 static __thread uint32_t s_ntime = UINT32_MAX;
-static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };

 union _x16rv2_context_overlay
 {
@@ -74,16 +73,10 @@ void x16rv2_hash( void* output, const void* input )
   x16rv2_context_overlay ctx;
   void *in = (void*) input;
   int size = 80;
-/*
-   if ( s_ntime == UINT32_MAX )
-   {
-      const uint8_t* in8 = (uint8_t*) input;
-      x16_r_s_getAlgoString( &in8[4], hashOrder );
-   }
-*/
+
   for ( int i = 0; i < 16; i++ )
   {
-      const char elem = hashOrder[i];
+      const char elem = x16r_hash_order[i];
      const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';

      switch ( algo )
@@ -203,48 +196,48 @@ int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
                   uint64_t *hashes_done, struct thr_info *mythr )
 {
   uint32_t _ALIGN(128) hash32[8];
-   uint32_t _ALIGN(128) endiandata[20];
+   uint32_t _ALIGN(128) edata[20];
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
-   const uint32_t Htarg = ptarget[7];
   const uint32_t first_nonce = pdata[19];
-   int thr_id = mythr->id;  // thr_id arg is deprecated
+   const int thr_id = mythr->id;  
   uint32_t nonce = first_nonce;
   volatile uint8_t *restart = &(work_restart[thr_id].restart);
+   const bool bench = opt_benchmark;

-   casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
-   casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
-   casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
-   casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
-   casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
+   casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
+   casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
+   casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
+   casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
+   casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );

+   static __thread uint32_t s_ntime = UINT32_MAX;
   if ( s_ntime != pdata[17] )
   {
      uint32_t ntime = swab32(pdata[17]);
-      x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
+      x16_r_s_getAlgoString( (const uint8_t*) (&edata[1]), x16r_hash_order );
      s_ntime = ntime;
      if ( opt_debug && !thr_id )
-              applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
+              applog( LOG_DEBUG, "hash order %s (%08x)",
+                                 x16r_hash_order, ntime );
   }

-   if ( opt_benchmark )
-      ptarget[7] = 0x0cff;
+   if ( bench )   ptarget[7] = 0x0cff;

   do
   {
-      be32enc( &endiandata[19], nonce );
-      x16rv2_hash( hash32, endiandata );
+      edata[19] = nonce;
+      x16rv2_hash( hash32, edata );

-      if ( hash32[7] <= Htarg )
-      if (fulltest( hash32, ptarget ) && !opt_benchmark )
+      if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
      {
-         pdata[19] = nonce;
+         pdata[19] = bswap_32( nonce );
         submit_solution( work, hash32, mythr );
      }
      nonce++;
   } while ( nonce < max_nonce && !(*restart) );
   pdata[19] = nonce;
-   *hashes_done = pdata[19] - first_nonce + 1;
+   *hashes_done = pdata[19] - first_nonce;
   return 0;
 }

--- a/algo/x16/x21s.c
+++ b/algo/x16/x21s.c
@@ -97,7 +97,7 @@ int scanhash_x21s( struct work *work, uint32_t max_nonce,
      nonce++;
   } while ( nonce < max_nonce && !(*restart) );
   pdata[19] = nonce;
-   *hashes_done = pdata[19] - first_nonce + 1;
+   *hashes_done = pdata[19] - first_nonce;
   return 0;
 }

--- a/algo/x17/sonoa-4way.c
+++ b/algo/x17/sonoa-4way.c
--- a/algo/x17/sonoa.c
+++ b/algo/x17/sonoa.c
@@ -563,59 +563,31 @@ void sonoa_hash( void *state, const void *input )
 }

 int scanhash_sonoa( struct work *work, uint32_t max_nonce,
-	            uint64_t *hashes_done, struct thr_info *mythr )
+             uint64_t *hashes_done, struct thr_info *mythr)
 {
-   uint32_t _ALIGN(128) hash32[8];
-   uint32_t _ALIGN(128) endiandata[20];
+   uint32_t edata[20] __attribute__((aligned(64)));
+   uint32_t hash64[8] __attribute__((aligned(64)));
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
+   uint32_t n = pdata[19];
   const uint32_t first_nonce = pdata[19];
-   const uint32_t Htarg = ptarget[7];
-   uint32_t n = pdata[19] - 1;
-   int thr_id = mythr->id;  // thr_id arg is deprecated
+   const int thr_id = mythr->id;
+   const bool bench = opt_benchmark;

-   uint64_t htmax[] =
+   mm128_bswap32_80( edata, pdata );
+
+   do
   {
-	0,
-	0xF,
-	0xFF,
-	0xFFF,
-	0xFFFF,
-	0x10000000
-   };
-   uint32_t masks[] =
-   {
-	0xFFFFFFFF,
-	0xFFFFFFF0,
-	0xFFFFFF00,
-	0xFFFFF000,
-	0xFFFF0000,
-	0
-   };
-
-
-   // we need bigendian data...
-   casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
-   casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
-   casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
-   casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
-   casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
-
-   for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
-   {
-      uint32_t mask = masks[m];
-      do
+      edata[19] = n;
+      sonoa_hash( hash64, edata );
+      if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
      {
-         pdata[19] = ++n;
-         be32enc(&endiandata[19], n);
-         sonoa_hash(hash32, endiandata);
-         if ( !( hash32[7] & mask ) )
-         if ( fulltest( hash32, ptarget ) && !opt_benchmark )
-            submit_solution( work, hash32, mythr );
-	   } while (n < max_nonce && !work_restart[thr_id].restart);
-	   break;
-	}
-   *hashes_done = n - first_nonce + 1;
+         pdata[19] = bswap_32( n );
+         submit_solution( work, hash64, mythr );
+      }
+      n++;
+   } while ( n < max_nonce && !work_restart[thr_id].restart );
+   *hashes_done = n - first_nonce;
   pdata[19] = n;
   return 0;
 }
--- a/algo/x17/x17-4way.c
+++ b/algo/x17/x17-4way.c
@@ -74,9 +74,7 @@ void x17_8way_hash( void *state, const void *input )

     blake512_8way_full( &ctx.blake, vhash, input, 80 );

-     bmw512_8way_init( &ctx.bmw );
-     bmw512_8way_update( &ctx.bmw, vhash, 64 );
-     bmw512_8way_close( &ctx.bmw, vhash );
+     bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );

 #if defined(__VAES__)

@@ -106,9 +104,7 @@ void x17_8way_hash( void *state, const void *input )

 #endif

-     skein512_8way_init( &ctx.skein );
-     skein512_8way_update( &ctx.skein, vhash, 64 );
-     skein512_8way_close( &ctx.skein, vhash );
+     skein512_8way_full( &ctx.skein, vhash, vhash, 64 );

     jh512_8way_init( &ctx.jh );
     jh512_8way_update( &ctx.jh, vhash, 64 );
@@ -287,18 +283,18 @@ void x17_8way_hash( void *state, const void *input )
 int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
                       uint64_t *hashes_done, struct thr_info *mythr )
 {
-   uint32_t hash[8*16] __attribute__ ((aligned (128)));
-   uint32_t vdata[24*8] __attribute__ ((aligned (64)));
+   uint32_t hash[8*8] __attribute__ ((aligned (128)));
+   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-   uint32_t *hash7 = &(hash[7<<3]);
+   uint32_t *hashd7 = &(hash[7*8]);
   uint32_t *pdata = work->data;
   const uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
   const uint32_t last_nonce = max_nonce - 8;
-   __m512i  *noncev = (__m512i*)vdata + 9;   // aligned
+   __m512i  *noncev = (__m512i*)vdata + 9; 
   uint32_t n = first_nonce;
   const int thr_id = mythr->id;
-   const uint32_t Htarg = ptarget[7];
+   const uint32_t targ32 = ptarget[7];
   const bool bench = opt_benchmark;

   mm512_bswap32_intrlv80_8x64( vdata, pdata );
@@ -310,7 +306,7 @@ int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
      x17_8way_hash( hash, vdata );

      for ( int lane = 0; lane < 8; lane++ )
-      if ( unlikely( ( hash7[ lane ] <= Htarg ) && !bench ) )
+      if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
      {
         extr_lane_8x32( lane_hash, hash, lane, 256 );
         if ( likely( valid_hash( lane_hash, ptarget ) ) )
@@ -378,9 +374,7 @@ void x17_4way_hash( void *state, const void *input )

     intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );

-     skein512_4way_init( &ctx.skein );
-     skein512_4way_update( &ctx.skein, vhash, 64 );
-     skein512_4way_close( &ctx.skein, vhash );
+     skein512_4way_full( &ctx.skein, vhash, vhash, 64 );

     jh512_4way_init( &ctx.jh );
     jh512_4way_update( &ctx.jh, vhash, 64 );
@@ -474,18 +468,18 @@ void x17_4way_hash( void *state, const void *input )
 int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
                       uint64_t *hashes_done, struct thr_info *mythr )
 {
-   uint32_t hash[16*4] __attribute__ ((aligned (64)));
+   uint32_t hash[8*4] __attribute__ ((aligned (64)));
   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-   uint32_t *hash7 = &(hash[7<<2]);
+   uint32_t *hashd7 = &(hash[ 7*4 ]);
   uint32_t *pdata = work->data;
   const uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
-   const uint32_t last_nonce = max_nonce -4;
-   __m256i  *noncev = (__m256i*)vdata + 9;   // aligned
+   const uint32_t last_nonce = max_nonce - 4;
+   __m256i  *noncev = (__m256i*)vdata + 9;
   uint32_t n = first_nonce;
   const int thr_id = mythr->id;
-   const uint32_t Htarg = ptarget[7];
+   const uint32_t targ32 = ptarget[7];
   const bool bench = opt_benchmark;

   mm256_bswap32_intrlv80_4x64( vdata, pdata );
@@ -496,7 +490,7 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
      x17_4way_hash( hash, vdata );

      for ( int lane = 0; lane < 4; lane++ )
-      if ( unlikely( hash7[ lane ] <= Htarg && !bench ) )
+      if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
      {  
         extr_lane_4x32( lane_hash, hash, lane, 256 );
         if ( valid_hash( lane_hash, ptarget ) )
--- a/algo/x17/x17.c
+++ b/algo/x17/x17.c
@@ -169,8 +169,8 @@ int scanhash_x17( struct work *work, uint32_t max_nonce,
         submit_solution( work, hash64, mythr );
      }
      n++;
-   } while ( n < max_nonce && !work_restart[thr_id].restart);
-   *hashes_done = n - first_nonce + 1;
+   } while ( n < max_nonce && !work_restart[thr_id].restart );
+   *hashes_done = n - first_nonce;
   pdata[19] = n;
   return 0;
 }
--- a/algo/x17/xevan-4way.c
+++ b/algo/x17/xevan-4way.c
@@ -76,9 +76,7 @@ void xevan_8way_hash( void *output, const void *input )
     blake512_8way_full( &ctx.blake, vhash, input, 80 );
     memset( &vhash[8<<3], 0, 64<<3 );

-     bmw512_8way_init( &ctx.bmw );
-     bmw512_8way_update( &ctx.bmw, vhash, dataLen );
-     bmw512_8way_close( &ctx.bmw, vhash );
+     bmw512_8way_full( &ctx.bmw, vhash, vhash, dataLen );

 #if defined(__VAES__)

@@ -108,9 +106,7 @@ void xevan_8way_hash( void *output, const void *input )

 #endif

-     skein512_8way_init( &ctx.skein );
-     skein512_8way_update( &ctx.skein, vhash, dataLen );
-     skein512_8way_close( &ctx.skein, vhash );
+     skein512_8way_full( &ctx.skein, vhash, vhash, dataLen );

     jh512_8way_init( &ctx.jh );
     jh512_8way_update( &ctx.jh, vhash, dataLen );
@@ -291,9 +287,7 @@ void xevan_8way_hash( void *output, const void *input )

     blake512_8way_full( &ctx.blake, vhash, vhash, dataLen );

-     bmw512_8way_init( &ctx.bmw );
-     bmw512_8way_update( &ctx.bmw, vhash, dataLen );
-     bmw512_8way_close( &ctx.bmw, vhash );
+     bmw512_8way_full( &ctx.bmw, vhash, vhash, dataLen );

 #if defined(__VAES__)

@@ -323,9 +317,7 @@ void xevan_8way_hash( void *output, const void *input )

 #endif

-     skein512_8way_init( &ctx.skein );
-     skein512_8way_update( &ctx.skein, vhash, dataLen );
-     skein512_8way_close( &ctx.skein, vhash );
+     skein512_8way_full( &ctx.skein, vhash, vhash, dataLen );

     jh512_8way_init( &ctx.jh );
     jh512_8way_update( &ctx.jh, vhash, dataLen );
@@ -504,40 +496,43 @@ void xevan_8way_hash( void *output, const void *input )
 int scanhash_xevan_8way( struct work *work, uint32_t max_nonce,
                       uint64_t *hashes_done, struct thr_info *mythr )
 {
-   uint32_t hash[8*16] __attribute__ ((aligned (128)));
-   uint32_t vdata[24*8] __attribute__ ((aligned (64)));
+   uint32_t hash[8*8] __attribute__ ((aligned (128)));
+   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-   uint32_t *hash7 = &(hash[7<<3]);
+   uint32_t *hashd7 = &(hash[7*8]);
   uint32_t *pdata = work->data;
   const uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
   const uint32_t last_nonce = max_nonce - 8;
-   __m512i  *noncev = (__m512i*)vdata + 9;   // aligned
+   __m512i  *noncev = (__m512i*)vdata + 9;
   uint32_t n = first_nonce;
   const int thr_id = mythr->id;
-   const uint32_t Htarg = ptarget[7];
+   const uint32_t targ32 = ptarget[7];
+   const bool bench = opt_benchmark;

   mm512_bswap32_intrlv80_8x64( vdata, pdata );
+   *noncev = mm512_intrlv_blend_32(
+              _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
+                                n+3, 0, n+2, 0, n+1, 0, n,   0 ), *noncev );
   do
   {
-      *noncev = mm512_intrlv_blend_32( mm512_bswap_32(
-              _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
-                                n+3, 0, n+2, 0, n+1, 0, n,   0 ) ), *noncev );
      xevan_8way_hash( hash, vdata );

      for ( int lane = 0; lane < 8; lane++ )
-      if unlikely( ( hash7[ lane ] <= Htarg ) )
+      if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
      {
         extr_lane_8x32( lane_hash, hash, lane, 256 );
-         if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
+         if ( likely( valid_hash( lane_hash, ptarget ) ) )
         {
-            pdata[19] = n + lane;
+            pdata[19] = bswap_32( n + lane );
            submit_lane_solution( work, lane_hash, mythr, lane );
         }
      }
+      *noncev = _mm512_add_epi32( *noncev,
+                                  m512_const1_64( 0x0000000800000000 ) );
      n += 8;
   } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
-
+   pdata[19] = n;
   *hashes_done = n - first_nonce;
   return 0;
 }
@@ -578,8 +573,6 @@ void xevan_4way_hash( void *output, const void *input )
     const int dataLen = 128;
     xevan_4way_context_overlay ctx __attribute__ ((aligned (64)));

-     // parallel 4 way
-
     blake512_4way_full( &ctx.blake, vhash, input, 80 );
     memset( &vhash[8<<2], 0, 64<<2 );

@@ -598,9 +591,7 @@ void xevan_4way_hash( void *output, const void *input )
     // Parallel 4way
     intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );

-     skein512_4way_init( &ctx.skein );
-     skein512_4way_update( &ctx.skein, vhash, dataLen );
-     skein512_4way_close( &ctx.skein, vhash );
+     skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );

     jh512_4way_init( &ctx.jh );
     jh512_4way_update( &ctx.jh, vhash, dataLen );
@@ -618,15 +609,11 @@ void xevan_4way_hash( void *output, const void *input )
     cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
     cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );

-     shavite512_2way_init( &ctx.shavite );
-     shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
-     shavite512_2way_init( &ctx.shavite );
-     shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
+     shavite512_2way_full( &ctx.shavite, vhashA, vhashA, dataLen );
+     shavite512_2way_full( &ctx.shavite, vhashB, vhashB, dataLen );

-     simd_2way_init( &ctx.simd, 512 );
-     simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
-     simd_2way_init( &ctx.simd, 512 );
-     simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
+     simd512_2way_full( &ctx.simd, vhashA, vhashA, dataLen );
+     simd512_2way_full( &ctx.simd, vhashB, vhashB, dataLen );

     dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
     dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
@@ -718,9 +705,7 @@ void xevan_4way_hash( void *output, const void *input )

     intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );

-     skein512_4way_init( &ctx.skein );
-     skein512_4way_update( &ctx.skein, vhash, dataLen );
-     skein512_4way_close( &ctx.skein, vhash );
+     skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );

     jh512_4way_init( &ctx.jh );
     jh512_4way_update( &ctx.jh, vhash, dataLen );
@@ -738,15 +723,11 @@ void xevan_4way_hash( void *output, const void *input )
     cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
     cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );

-     shavite512_2way_init( &ctx.shavite );
-     shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
-     shavite512_2way_init( &ctx.shavite );
-     shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
+     shavite512_2way_full( &ctx.shavite, vhashA, vhashA, dataLen );
+     shavite512_2way_full( &ctx.shavite, vhashB, vhashB, dataLen );

-     simd_2way_init( &ctx.simd, 512 );
-     simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
-     simd_2way_init( &ctx.simd, 512 );
-     simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
+     simd512_2way_full( &ctx.simd, vhashA, vhashA, dataLen );
+     simd512_2way_full( &ctx.simd, vhashB, vhashB, dataLen );

     dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
     dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
@@ -818,41 +799,43 @@ void xevan_4way_hash( void *output, const void *input )
 int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
                         uint64_t *hashes_done, struct thr_info *mythr )
 {
-   uint32_t hash[4*16] __attribute__ ((aligned (64)));
-   uint32_t vdata[24*4] __attribute__ ((aligned (64)));
-   uint32_t lane_hash[8] __attribute__ ((aligned (32)));
-   uint32_t *hash7 = &(hash[7<<2]);
+   uint32_t hash[16*4] __attribute__ ((aligned (128)));
+   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
+   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
+   uint32_t *hashd7 = &(hash[7<<2]);
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   int thr_id = mythr->id;
-   __m256i  *noncev = (__m256i*)vdata + 9;   // aligned
-
-   const uint32_t Htarg = ptarget[7];
+   __m256i  *noncev = (__m256i*)vdata + 9; 
+   const uint32_t targ32 = ptarget[7];
   const uint32_t first_nonce = pdata[19];
+   const uint32_t last_nonce = max_nonce - 4;
   uint32_t n = first_nonce;
+   const bool bench = opt_benchmark;

-   if ( opt_benchmark )
-      ptarget[7] = 0x0cff;
+   if ( bench )  ptarget[7] = 0x0cff;

   mm256_bswap32_intrlv80_4x64( vdata, pdata );
+   *noncev = mm256_intrlv_blend_32(
+                   _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
   do {
-      *noncev = mm256_intrlv_blend_32( mm256_bswap_32(
-               _mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ), *noncev );
-
      xevan_4way_hash( hash, vdata );
      for ( int lane = 0; lane < 4; lane++ )
-      if ( hash7[ lane ] <= Htarg )
+      if ( unlikely( hashd7[ lane ] <= targ32 ) && ! bench )
      {
         extr_lane_4x32( lane_hash, hash, lane, 256 );
-	      if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
+	      if ( valid_hash( lane_hash, ptarget ) )
         {
-             pdata[19] = n + lane;
+             pdata[19] = bswap_32( n + lane );
             submit_lane_solution( work, lane_hash, mythr, lane );
         }
      }
+      *noncev = _mm256_add_epi32( *noncev,
+                                  m256_const1_64( 0x0000000400000000 ) );
      n += 4;
-   } while ( ( n < max_nonce-4 ) && !work_restart[thr_id].restart );
-   *hashes_done = n - first_nonce + 1;
+   } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
+   pdata[19] = n;
+   *hashes_done = n - first_nonce;
   return 0;
 }

--- a/algo/x17/xevan.c
+++ b/algo/x17/xevan.c
@@ -56,8 +56,6 @@ typedef struct {
 } xevan_ctx_holder;

 xevan_ctx_holder xevan_ctx __attribute__ ((aligned (64)));
-static __thread sph_blake512_context xevan_blake_mid
-                                        __attribute__ ((aligned (64)));

 void init_xevan_ctx()
 {
@@ -85,34 +83,23 @@ void init_xevan_ctx()
 #endif
 };

-void xevan_blake512_midstate( const void* input )
-{
-    memcpy( &xevan_blake_mid, &xevan_ctx.blake, sizeof xevan_blake_mid );
-    sph_blake512( &xevan_blake_mid, input, 64 );
-}
-
 void xevan_hash(void *output, const void *input)
 {
-        uint32_t _ALIGN(64) hash[32]; // 128 bytes required
+   uint32_t _ALIGN(64) hash[32]; // 128 bytes required
 	const int dataLen = 128;
-        xevan_ctx_holder ctx __attribute__ ((aligned (64)));
-        memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
-
-        const int midlen = 64;            // bytes
-        const int tail   = 80 - midlen;   // 16
-
-        memcpy( &ctx.blake, &xevan_blake_mid, sizeof xevan_blake_mid );
-        sph_blake512( &ctx.blake, input + midlen, tail );
-	sph_blake512_close(&ctx.blake, hash);
+   xevan_ctx_holder ctx __attribute__ ((aligned (64)));
+   memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );

+   sph_blake512( &ctx.blake, input, 80 );
+   sph_blake512_close( &ctx.blake, hash );
 	memset(&hash[16], 0, 64);

 	sph_bmw512(&ctx.bmw, hash, dataLen);
 	sph_bmw512_close(&ctx.bmw, hash);

 #if defined(__AES__)
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
-                                  (const char*)hash, dataLen*8 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
+                                     (const char*)hash, dataLen*8 );
 #else
 	sph_groestl512(&ctx.groestl, hash, dataLen);
 	sph_groestl512_close(&ctx.groestl, hash);
@@ -127,20 +114,20 @@ void xevan_hash(void *output, const void *input)
 	sph_keccak512(&ctx.keccak, hash, dataLen);
 	sph_keccak512_close(&ctx.keccak, hash);

-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
-                                (const BitSequence*)hash, dataLen );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+                                 (const BitSequence*)hash, dataLen );

-        cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
-                              (const byte*) hash, dataLen );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
+                                 (const byte*) hash, dataLen );

 	sph_shavite512(&ctx.shavite, hash, dataLen);
 	sph_shavite512_close(&ctx.shavite, hash);

-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                         (const BitSequence *)hash, dataLen*8 );

 #if defined(__AES__)
-        update_final_echo( &ctx.echo, (BitSequence *) hash,
+   update_final_echo( &ctx.echo, (BitSequence *) hash,
                           (const BitSequence *) hash, dataLen*8 );
 #else
 	sph_echo512(&ctx.echo, hash, dataLen);
@@ -159,15 +146,15 @@ void xevan_hash(void *output, const void *input)
 	sph_whirlpool(&ctx.whirlpool, hash, dataLen);
 	sph_whirlpool_close(&ctx.whirlpool, hash);

-        SHA512_Update( &ctx.sha512, hash, dataLen );
-        SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
+   SHA512_Update( &ctx.sha512, hash, dataLen );
+   SHA512_Final( (unsigned char*) hash, &ctx.sha512 );

 	sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
 	sph_haval256_5_close(&ctx.haval, hash);

 	memset(&hash[8], 0, dataLen - 32);

-        memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
+   memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );

 	sph_blake512(&ctx.blake, hash, dataLen);
 	sph_blake512_close(&ctx.blake, hash);
@@ -176,11 +163,11 @@ void xevan_hash(void *output, const void *input)
 	sph_bmw512_close(&ctx.bmw, hash);

 #if defined(__AES__)
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
-                                  (const BitSequence*)hash, dataLen*8 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
+                              (const BitSequence*)hash, dataLen*8 );
 #else
 	sph_groestl512(&ctx.groestl, hash, dataLen);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif

 	sph_skein512(&ctx.skein, hash, dataLen);
@@ -191,24 +178,25 @@ void xevan_hash(void *output, const void *input)

 	sph_keccak512(&ctx.keccak, hash, dataLen);
 	sph_keccak512_close(&ctx.keccak, hash);
-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
-                                (const BitSequence*)hash, dataLen );

-        cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
-                              (const byte*) hash, dataLen );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+                                 (const BitSequence*)hash, dataLen );
+
+   cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
+                                 (const byte*) hash, dataLen );

 	sph_shavite512(&ctx.shavite, hash, dataLen);
 	sph_shavite512_close(&ctx.shavite, hash);

-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                         (const BitSequence *)hash, dataLen*8 );

 #if defined(__AES__)
-        update_final_echo( &ctx.echo, (BitSequence *) hash,
+   update_final_echo( &ctx.echo, (BitSequence *) hash,
                           (const BitSequence *) hash, dataLen*8 );
 #else
-        sph_echo512(&ctx.echo, hash, dataLen);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512(&ctx.echo, hash, dataLen);
+   sph_echo512_close(&ctx.echo, hash);
 #endif

 	sph_hamsi512(&ctx.hamsi, hash, dataLen);
@@ -223,8 +211,8 @@ void xevan_hash(void *output, const void *input)
 	sph_whirlpool(&ctx.whirlpool, hash, dataLen);
 	sph_whirlpool_close(&ctx.whirlpool, hash);

-        SHA512_Update( &ctx.sha512, hash, dataLen );
-        SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
+   SHA512_Update( &ctx.sha512, hash, dataLen );
+   SHA512_Final( (unsigned char*) hash, &ctx.sha512 );

 	sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
 	sph_haval256_5_close(&ctx.haval, hash);
@@ -233,41 +221,33 @@ void xevan_hash(void *output, const void *input)
 }

 int scanhash_xevan( struct work *work, uint32_t max_nonce,
-	            uint64_t *hashes_done, struct thr_info *mythr )
+             uint64_t *hashes_done, struct thr_info *mythr)
 {
-	uint32_t _ALIGN(64) hash[8];
-	uint32_t _ALIGN(64) endiandata[20];
-	uint32_t *pdata = work->data;
-	uint32_t *ptarget = work->target;
-   int thr_id = mythr->id;  // thr_id arg is deprecated
-	const uint32_t Htarg = ptarget[7];
-	const uint32_t first_nonce = pdata[19];
-	uint32_t nonce = first_nonce;
-	volatile uint8_t *restart = &(work_restart[thr_id].restart);
+   uint32_t edata[20] __attribute__((aligned(64)));
+   uint32_t hash64[8] __attribute__((aligned(64)));
+   uint32_t *pdata = work->data;
+   uint32_t *ptarget = work->target;
+   uint32_t n = pdata[19];
+   const uint32_t first_nonce = pdata[19];
+   const int thr_id = mythr->id;
+   const bool bench = opt_benchmark;

-	if (opt_benchmark)
-		ptarget[7] = 0x0cff;
+   mm128_bswap32_80( edata, pdata );

-	for (int k=0; k < 19; k++)
-		be32enc(&endiandata[k], pdata[k]);
-
-   xevan_blake512_midstate( endiandata );
-	do {
-		be32enc(&endiandata[19], nonce);
-		xevan_hash(hash, endiandata);
-
-		if (hash[7] <= Htarg )
-      if ( fulltest( hash, ptarget ) && !opt_benchmark )
-	   {
-         pdata[19] = nonce;
-         submit_solution( work, hash, mythr );
-		}
-		nonce++;
-	} while ( nonce < max_nonce && !(*restart) );
-
-	pdata[19] = nonce;
-	*hashes_done = pdata[19] - first_nonce + 1;
-	return 0;
+   do
+   {
+      edata[19] = n;
+      xevan_hash( hash64, edata );
+      if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
+      {
+         pdata[19] = bswap_32( n );
+         submit_solution( work, hash64, mythr );
+      }
+      n++;
+   } while ( n < max_nonce && !work_restart[thr_id].restart );
+   pdata[19] = n;
+   *hashes_done = n - first_nonce;
+   return 0;
 }

 #endif
--- a/algo/x22/x22i-4way.c
+++ b/algo/x22/x22i-4way.c
@@ -87,64 +87,40 @@ void x22i_8way_hash( void *output, const void *input )
   unsigned char hashA7[64]    __attribute__((aligned(32))) = {0};
   x22i_8way_ctx_overlay ctx;

-   blake512_8way_init( &ctx.blake );
-   blake512_8way_update( &ctx.blake, input, 80 );
-   blake512_8way_close( &ctx.blake, vhash );
+   blake512_8way_full( &ctx.blake, vhash, input, 80 );

-   bmw512_8way_init( &ctx.bmw );
-   bmw512_8way_update( &ctx.bmw, vhash, 64 );
-   bmw512_8way_close( &ctx.bmw, vhash );
+   bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );

 #if defined(__VAES__)

-     rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
+   rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );

-     groestl512_4way_init( &ctx.groestl, 64 );
-     groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, 512 );
-     groestl512_4way_init( &ctx.groestl, 64 );
-     groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, 512 );
+   groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
+   groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );

-     rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
+   rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );

 #else

-   dintrlv_8x64_512( hash0, hash1, hash2, hash3,
-                     hash4, hash5, hash6, hash7, vhash );
+   dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
+                   vhash );

-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash0,
-                                  (const char*)hash0, 512 );
-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash1,
-                                  (const char*)hash1, 512 );
-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash2,
-                                  (const char*)hash2, 512 );
-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash3,
-                                  (const char*)hash3, 512 );
-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash4,
-                                  (const char*)hash4, 512 );
-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash5,
-                                  (const char*)hash5, 512 );
-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash6,
-                                  (const char*)hash6, 512 );
-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash7,
-                                  (const char*)hash7, 512 );
+   groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
+   groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
+   groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
+   groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
+   groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
+   groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
+   groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
+   groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
+
+   intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
+                  hash7 );

-   intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
-                           hash4, hash5, hash6, hash7 );
-   
 #endif

-   skein512_8way_init( &ctx.skein );
-   skein512_8way_update( &ctx.skein, vhash, 64 );
-   skein512_8way_close( &ctx.skein, vhash );
-
+   skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
+     
   jh512_8way_init( &ctx.jh );
   jh512_8way_update( &ctx.jh, vhash, 64 );
   jh512_8way_close( &ctx.jh, vhash );
@@ -155,22 +131,16 @@ void x22i_8way_hash( void *output, const void *input )

   rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );

-   luffa_4way_init( &ctx.luffa, 512 );
-   luffa_4way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
-   luffa_4way_init( &ctx.luffa, 512 );
-   luffa_4way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
+   luffa512_4way_full( &ctx.luffa, vhashA, vhashA, 64 );
+   luffa512_4way_full( &ctx.luffa, vhashB, vhashB, 64 );

-   cube_4way_init( &ctx.cube, 512, 16, 32 );
-   cube_4way_update_close( &ctx.cube, vhashA, vhashA, 64 );
-   cube_4way_init( &ctx.cube, 512, 16, 32 );
-   cube_4way_update_close( &ctx.cube, vhashB, vhashB, 64 );
+   cube_4way_full( &ctx.cube, vhashA, 512, vhashA, 64 );
+   cube_4way_full( &ctx.cube, vhashB, 512, vhashB, 64 );

 #if defined(__VAES__)

-   shavite512_4way_init( &ctx.shavite );
-   shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
-   shavite512_4way_init( &ctx.shavite );
-   shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
+   shavite512_4way_full( &ctx.shavite, vhashA, vhashA, 64 );
+   shavite512_4way_full( &ctx.shavite, vhashB, vhashB, 64 );

 #else

@@ -207,17 +177,13 @@ void x22i_8way_hash( void *output, const void *input )

 #endif

-   simd_4way_init( &ctx.simd, 512 );
-   simd_4way_update_close( &ctx.simd, vhashA, vhashA, 512 );
-   simd_4way_init( &ctx.simd, 512 );
-   simd_4way_update_close( &ctx.simd, vhashB, vhashB, 512 );
+   simd512_4way_full( &ctx.simd, vhashA, vhashA, 64 );
+   simd512_4way_full( &ctx.simd, vhashB, vhashB, 64 );

 #if defined(__VAES__)

-   echo_4way_init( &ctx.echo, 512 );
-   echo_4way_update_close( &ctx.echo, vhashA, vhashA, 512 );
-   echo_4way_init( &ctx.echo, 512 );
-   echo_4way_update_close( &ctx.echo, vhashB, vhashB, 512 );
+   echo_4way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
+   echo_4way_full( &ctx.echo, vhashB, 512, vhashB, 64 );

   rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );

@@ -226,30 +192,22 @@ void x22i_8way_hash( void *output, const void *input )
   dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
   dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );

-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash0,
-                            (const BitSequence*)hash0, 512 );
-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash1,
-                            (const BitSequence*)hash1, 512 );
-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash2,
-                            (const BitSequence*)hash2, 512 );
-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash3,
-                            (const BitSequence*)hash3, 512 );
-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash4,
-                            (const BitSequence*)hash4, 512 );
-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash5,
-                            (const BitSequence*)hash5, 512 );
-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash6,
-                            (const BitSequence*)hash6, 512 );
-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash7,
-                            (const BitSequence*)hash7, 512 );
+   echo_full( &ctx.echo, (BitSequence *)hash0, 512,
+                   (const BitSequence *)hash0, 64 );
+   echo_full( &ctx.echo, (BitSequence *)hash1, 512,
+                   (const BitSequence *)hash1, 64 );
+   echo_full( &ctx.echo, (BitSequence *)hash2, 512,
+                   (const BitSequence *)hash2, 64 );
+   echo_full( &ctx.echo, (BitSequence *)hash3, 512,
+                   (const BitSequence *)hash3, 64 );
+   echo_full( &ctx.echo, (BitSequence *)hash4, 512,
+                   (const BitSequence *)hash4, 64 );
+   echo_full( &ctx.echo, (BitSequence *)hash5, 512,
+                   (const BitSequence *)hash5, 64 );
+   echo_full( &ctx.echo, (BitSequence *)hash6, 512,
+                   (const BitSequence *)hash6, 64 );
+   echo_full( &ctx.echo, (BitSequence *)hash7, 512,
+                   (const BitSequence *)hash7, 64 );

   intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
                           hash4, hash5, hash6, hash7 );
@@ -443,6 +401,55 @@ void x22i_8way_hash( void *output, const void *input )
   sha256_8way_close( &ctx.sha256, output );
 }

+int scanhash_x22i_8way( struct work *work, uint32_t max_nonce,
+                       uint64_t *hashes_done, struct thr_info *mythr )
+{
+   uint32_t hash[8*8] __attribute__ ((aligned (128)));
+   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
+   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
+   uint32_t *hashd7 = &(hash[7*8]);
+   uint32_t *pdata = work->data;
+   uint32_t *ptarget = work->target;
+   const uint32_t first_nonce = pdata[19];
+   const uint32_t last_nonce = max_nonce - 8;
+   __m512i  *noncev = (__m512i*)vdata + 9;
+   uint32_t n = first_nonce;
+   const int thr_id = mythr->id;
+   const uint32_t targ32 = ptarget[7];
+   const bool bench = opt_benchmark;
+
+   if ( bench )  ptarget[7] = 0x08ff;
+
+   InitializeSWIFFTX();
+   
+   mm512_bswap32_intrlv80_8x64( vdata, pdata );
+   *noncev = mm512_intrlv_blend_32(
+              _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
+                                n+3, 0, n+2, 0, n+1, 0, n,   0 ), *noncev );
+   do
+   {
+      x22i_8way_hash( hash, vdata );
+
+      for ( int lane = 0; lane < 8; lane++ )
+      if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
+      {
+         extr_lane_8x32( lane_hash, hash, lane, 256 );
+         if ( likely( valid_hash( lane_hash, ptarget ) ) )
+         {
+            pdata[19] = bswap_32( n + lane );
+            submit_lane_solution( work, lane_hash, mythr, lane );
+         }
+      }
+      *noncev = _mm512_add_epi32( *noncev,
+                                  m512_const1_64( 0x0000000800000000 ) );
+      n += 8;
+   } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
+   pdata[19] = n;
+   *hashes_done = n - first_nonce;
+   return 0;
+}
+
+/*
 int scanhash_x22i_8way( struct work* work, uint32_t max_nonce,
                   uint64_t *hashes_done, struct thr_info *mythr )
 {
@@ -488,6 +495,7 @@ int scanhash_x22i_8way( struct work* work, uint32_t max_nonce,
   *hashes_done = n - first_nonce;
   return 0;
 }
+*/

 #elif defined(X22I_4WAY)

@@ -531,33 +539,21 @@ void x22i_4way_hash( void *output, const void *input )
   unsigned char hashA3[64]    __attribute__((aligned(32))) = {0};
   x22i_ctx_overlay ctx;

-   blake512_4way_init( &ctx.blake );
-   blake512_4way_update( &ctx.blake, input, 80 );
-   blake512_4way_close( &ctx.blake, vhash );
+   blake512_4way_full( &ctx.blake, vhash, input, 80 );

   bmw512_4way_init( &ctx.bmw );
   bmw512_4way_update( &ctx.bmw, vhash, 64 );
   bmw512_4way_close( &ctx.bmw, vhash );
   dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
-   
-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash0,
-                                  (const char*)hash0, 512 );
-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash1,
-                                  (const char*)hash1, 512 );
-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash2,
-                                  (const char*)hash2, 512 );
-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash3,
-                                  (const char*)hash3, 512 );
+
+   groestl512_full( &ctx.groestl, (char*)hash0, (const char*)hash0, 512 );
+   groestl512_full( &ctx.groestl, (char*)hash1, (const char*)hash1, 512 );
+   groestl512_full( &ctx.groestl, (char*)hash2, (const char*)hash2, 512 );
+   groestl512_full( &ctx.groestl, (char*)hash3, (const char*)hash3, 512 );

   intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );

-   skein512_4way_init( &ctx.skein );
-   skein512_4way_update( &ctx.skein, vhash, 64 );
-   skein512_4way_close( &ctx.skein, vhash );
+   skein512_4way_full( &ctx.skein, vhash, vhash, 64 );

   jh512_4way_init( &ctx.jh );
   jh512_4way_update( &ctx.jh, vhash, 64 );
@@ -569,41 +565,29 @@ void x22i_4way_hash( void *output, const void *input )

   rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );

-   luffa_2way_init( &ctx.luffa, 512 );
-   luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
-   luffa_2way_init( &ctx.luffa, 512 );
-   luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
+   luffa512_2way_full( &ctx.luffa, vhashA, vhashA, 64 );
+   luffa512_2way_full( &ctx.luffa, vhashB, vhashB, 64 );

-   cube_2way_init( &ctx.cube, 512, 16, 32 );
-   cube_2way_update_close( &ctx.cube, vhashA, vhashA, 64 );
-   cube_2way_init( &ctx.cube, 512, 16, 32 );
-   cube_2way_update_close( &ctx.cube, vhashB, vhashB, 64 );
+   cube_2way_full( &ctx.cube, vhashA, 512, vhashA, 64 );
+   cube_2way_full( &ctx.cube, vhashB, 512, vhashB, 64 );
+   
+   shavite512_2way_full( &ctx.shavite, vhashA, vhashA, 64 );
+   shavite512_2way_full( &ctx.shavite, vhashB, vhashB, 64 );

-   shavite512_2way_init( &ctx.shavite );
-   shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
-   shavite512_2way_init( &ctx.shavite );
-   shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
-
-   simd_2way_init( &ctx.simd, 512 );
-   simd_2way_update_close( &ctx.simd, vhashA, vhashA, 512 );
-   simd_2way_init( &ctx.simd, 512 );
-   simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
+   simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
+   simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );

   dintrlv_2x128_512( hash0, hash1, vhashA );
   dintrlv_2x128_512( hash2, hash3, vhashB );
   
-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash0,
-                            (const BitSequence*)hash0, 512 );
-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash1,
-                            (const BitSequence*)hash1, 512 );
-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash2,
-                            (const BitSequence*)hash2, 512 );
-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash3,
-                            (const BitSequence*)hash3, 512 );
+   echo_full( &ctx.echo, (BitSequence *)hash0, 512,
+                   (const BitSequence *)hash0, 64 );
+   echo_full( &ctx.echo, (BitSequence *)hash1, 512,
+                   (const BitSequence *)hash1, 64 );
+   echo_full( &ctx.echo, (BitSequence *)hash2, 512,
+                   (const BitSequence *)hash2, 64 );
+   echo_full( &ctx.echo, (BitSequence *)hash3, 512,
+                   (const BitSequence *)hash3, 64 );

   intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );

@@ -722,44 +706,47 @@ void x22i_4way_hash( void *output, const void *input )
 int scanhash_x22i_4way( struct work* work, uint32_t max_nonce,
                   uint64_t *hashes_done, struct thr_info *mythr )
 {
-   uint32_t hash[4*16] __attribute__ ((aligned (64)));
-   uint32_t vdata[24*4] __attribute__ ((aligned (64)));
-   uint32_t lane_hash[8] __attribute__ ((aligned (32)));
-   uint32_t *hash7 = &(hash[7<<2]);
+   uint32_t hash[8*4] __attribute__ ((aligned (64)));
+   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
+   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
+   uint32_t *hashd7 = &(hash[ 7*4 ]);
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
-   __m256i  *noncev = (__m256i*)vdata + 9;   // aligned
+   const uint32_t last_nonce = max_nonce - 4;
+   __m256i  *noncev = (__m256i*)vdata + 9;
   uint32_t n = first_nonce;
   const int thr_id = mythr->id;
-   const uint32_t Htarg = ptarget[7];
+   const uint32_t targ32 = ptarget[7];
+   const bool bench = opt_benchmark;
+
+   if ( bench ) ptarget[7] = 0x08ff;

-   if (opt_benchmark)
-      ((uint32_t*)ptarget)[7] = 0x08ff;
-   
   InitializeSWIFFTX();
-
+   
   mm256_bswap32_intrlv80_4x64( vdata, pdata );
+   *noncev = mm256_intrlv_blend_32(
+                   _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
   do
   {
-      *noncev = mm256_intrlv_blend_32( mm256_bswap_32(
-              _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
      x22i_4way_hash( hash, vdata );

      for ( int lane = 0; lane < 4; lane++ )
-      if unlikely( ( hash7[ lane ] <= Htarg ) )
+      if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
      {
         extr_lane_4x32( lane_hash, hash, lane, 256 );
-         if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
+         if ( valid_hash( lane_hash, ptarget ) )
         {
-            pdata[19] = n + lane;
+            pdata[19] = bswap_32( n + lane );
            submit_lane_solution( work, lane_hash, mythr, lane );
         }
      }
+      *noncev = _mm256_add_epi32( *noncev,
+                                  m256_const1_64( 0x0000000400000000 ) );
      n += 4;
-   } while ( likely( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart ) );
-
-   *hashes_done = n - first_nonce + 1;
+   } while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
+   pdata[19] = n;
+   *hashes_done = n - first_nonce;
   return 0;
 }

--- a/algo/x22/x22i.c
+++ b/algo/x22/x22i.c
@@ -167,40 +167,38 @@ void x22i_hash( void *output, const void *input )
 	memcpy(output, hash, 32);
 }

-int scanhash_x22i( struct work* work, uint32_t max_nonce,
-                   uint64_t *hashes_done, struct thr_info *mythr )
+int scanhash_x22i( struct work *work, uint32_t max_nonce,
+             uint64_t *hashes_done, struct thr_info *mythr)
 {
-   uint32_t endiandata[20] __attribute__((aligned(64)));
-   uint32_t hash[8] __attribute__((aligned(64)));
-	uint32_t *pdata = work->data;
-	uint32_t *ptarget = work->target;
-	const uint32_t first_nonce = pdata[19];
-   const uint32_t Htarg = ptarget[7];
-   uint32_t n = first_nonce;
+   uint32_t edata[20] __attribute__((aligned(64)));
+   uint32_t hash64[8] __attribute__((aligned(64)));
+   uint32_t *pdata = work->data;
+   uint32_t *ptarget = work->target;
+   uint32_t n = pdata[19];
+   const uint32_t first_nonce = n;
   const int thr_id = mythr->id;
+   const bool bench = opt_benchmark;

-	if (opt_benchmark)
-		((uint32_t*)ptarget)[7] = 0x08ff;
-
-	for (int k=0; k < 20; k++)
-		be32enc(&endiandata[k], pdata[k]);
+   if ( bench ) ptarget[7] = 0x08ff;
+   
+   mm128_bswap32_80( edata, pdata );

   InitializeSWIFFTX();
-
+   
   do
   {
-       pdata[19] = ++n;
-       be32enc( &endiandata[19], n );
-
-       x22i_hash( hash, endiandata );
-
-       if ( hash[7] < Htarg )
-       if ( fulltest( hash, ptarget ) && !opt_benchmark )
-           submit_solution( work, hash, mythr );
-    } while ( n < max_nonce && !work_restart[thr_id].restart );
-
-	 *hashes_done = pdata[19] - first_nonce;
-	 return 0;
+      edata[19] = n;
+      x22i_hash( hash64, edata );
+      if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
+      {
+         pdata[19] = bswap_32( n );
+         submit_solution( work, hash64, mythr );
+      }
+      n++;
+   } while ( n < max_nonce && !work_restart[thr_id].restart );
+   *hashes_done = n - first_nonce;
+   pdata[19] = n;
+   return 0;
 }

 #endif
--- a/algo/x22/x25x-4way.c
+++ b/algo/x22/x25x-4way.c
@@ -530,6 +530,55 @@ void x25x_8way_hash( void *output, const void *input )
   blake2s_8way_full_blocks( &ctx.blake2s, output, vhashX, 64*24 );
 }

+int scanhash_x25x_8way( struct work *work, uint32_t max_nonce,
+                       uint64_t *hashes_done, struct thr_info *mythr )
+{
+   uint32_t hash[8*8] __attribute__ ((aligned (128)));
+   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
+   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
+   uint32_t *hashd7 = &(hash[7*8]);
+   uint32_t *pdata = work->data;
+   uint32_t *ptarget = work->target;
+   const uint32_t first_nonce = pdata[19];
+   const uint32_t last_nonce = max_nonce - 8;
+   __m512i  *noncev = (__m512i*)vdata + 9;
+   uint32_t n = first_nonce;
+   const int thr_id = mythr->id;
+   const uint32_t targ32 = ptarget[7];
+   const bool bench = opt_benchmark;
+
+   if ( bench )  ptarget[7] = 0x08ff;
+
+   InitializeSWIFFTX();
+
+   mm512_bswap32_intrlv80_8x64( vdata, pdata );
+   *noncev = mm512_intrlv_blend_32(
+              _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
+                                n+3, 0, n+2, 0, n+1, 0, n,   0 ), *noncev );
+   do
+   {
+      x25x_8way_hash( hash, vdata );
+
+      for ( int lane = 0; lane < 8; lane++ )
+      if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
+      {
+         extr_lane_8x32( lane_hash, hash, lane, 256 );
+         if ( likely( valid_hash( lane_hash, ptarget ) ) )
+         {
+            pdata[19] = bswap_32( n + lane );
+            submit_lane_solution( work, lane_hash, mythr, lane );
+         }
+      }
+      *noncev = _mm512_add_epi32( *noncev,
+                                  m512_const1_64( 0x0000000800000000 ) );
+      n += 8;
+   } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
+   pdata[19] = n;
+   *hashes_done = n - first_nonce;
+   return 0;
+}
+
+/*
 int scanhash_x25x_8way( struct work* work, uint32_t max_nonce,
                   uint64_t *hashes_done, struct thr_info *mythr )
 {
@@ -574,6 +623,7 @@ int scanhash_x25x_8way( struct work* work, uint32_t max_nonce,
   *hashes_done = n - first_nonce;
   return 0;
 }
+*/

 #elif defined(X25X_4WAY)

@@ -614,9 +664,7 @@ void x25x_4way_hash( void *output, const void *input )
   unsigned char vhashX[24][64*4] __attribute__ ((aligned (64)));
   x25x_4way_ctx_overlay ctx __attribute__ ((aligned (64)));

-   blake512_4way_init( &ctx.blake );
-   blake512_4way_update( &ctx.blake, input, 80 );
-   blake512_4way_close( &ctx.blake, vhash );
+   blake512_4way_full( &ctx.blake, vhash, input, 80 );
   dintrlv_4x64_512( hash0[0], hash1[0], hash2[0], hash3[0], vhash );

   bmw512_4way_init( &ctx.bmw );
@@ -624,24 +672,13 @@ void x25x_4way_hash( void *output, const void *input )
   bmw512_4way_close( &ctx.bmw, vhash );
   dintrlv_4x64_512( hash0[1], hash1[1], hash2[1], hash3[1], vhash );

-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash0[2],
-                                  (const char*)hash0[1], 512 );
-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash1[2],
-                                  (const char*)hash1[1], 512 );
-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash2[2],
-                                  (const char*)hash2[1], 512 );
-   init_groestl( &ctx.groestl, 64 );
-   update_and_final_groestl( &ctx.groestl, (char*)hash3[2],
-                                  (const char*)hash3[1], 512 );
+   groestl512_full( &ctx.groestl, (char*)hash0[2], (const char*)hash0[1], 512 );
+   groestl512_full( &ctx.groestl, (char*)hash1[2], (const char*)hash1[1], 512 );
+   groestl512_full( &ctx.groestl, (char*)hash2[2], (const char*)hash2[1], 512 );
+   groestl512_full( &ctx.groestl, (char*)hash3[2], (const char*)hash3[1], 512 );

   intrlv_4x64_512( vhash, hash0[2], hash1[2], hash2[2], hash3[2] );
-
-   skein512_4way_init( &ctx.skein );
-   skein512_4way_update( &ctx.skein, vhash, 64 );
-   skein512_4way_close( &ctx.skein, vhash );
+   skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
   dintrlv_4x64_512( hash0[3], hash1[3], hash2[3], hash3[3], vhash );

   jh512_4way_init( &ctx.jh );
@@ -654,32 +691,20 @@ void x25x_4way_hash( void *output, const void *input )
   keccak512_4way_close( &ctx.keccak, vhash );
   dintrlv_4x64_512( hash0[5], hash1[5], hash2[5], hash3[5], vhash );

-   init_luffa( &ctx.luffa, 512 );
-   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash0[6],
-                                (const BitSequence*)hash0[5], 64 );
-   init_luffa( &ctx.luffa, 512 );
-   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash1[6],
-                                (const BitSequence*)hash1[5], 64 );
-   init_luffa( &ctx.luffa, 512 );
-   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash2[6],
-                                (const BitSequence*)hash2[5], 64 );
-   init_luffa( &ctx.luffa, 512 );
-   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash3[6],
-                                (const BitSequence*)hash3[5], 64 );
-
-   cubehashInit( &ctx.cube, 512, 16, 32 );
-   cubehashUpdateDigest( &ctx.cube, (byte*) hash0[7],
-                              (const byte*)hash0[6], 64 );
-   cubehashInit( &ctx.cube, 512, 16, 32 );
-   cubehashUpdateDigest( &ctx.cube, (byte*) hash1[7],
-                              (const byte*)hash1[6], 64 );
-   cubehashInit( &ctx.cube, 512, 16, 32 );
-   cubehashUpdateDigest( &ctx.cube, (byte*) hash2[7],
-                              (const byte*)hash2[6], 64 );
-   cubehashInit( &ctx.cube, 512, 16, 32 );
-   cubehashUpdateDigest( &ctx.cube, (byte*) hash3[7],
-                              (const byte*)hash3[6], 64 );
+   luffa_full( &ctx.luffa, (BitSequence*)hash0[6], 512,
+                     (const BitSequence*)hash0[5], 64 );
+   luffa_full( &ctx.luffa, (BitSequence*)hash1[6], 512,
+                     (const BitSequence*)hash1[5], 64 );
+   luffa_full( &ctx.luffa, (BitSequence*)hash2[6], 512,
+                     (const BitSequence*)hash2[5], 64 );
+   luffa_full( &ctx.luffa, (BitSequence*)hash3[6], 512,
+                     (const BitSequence*)hash3[5], 64 );

+   cubehash_full( &ctx.cube, (byte*)hash0[7], 512, (const byte*)hash0[6], 64 );
+   cubehash_full( &ctx.cube, (byte*)hash1[7], 512, (const byte*)hash1[6], 64 );
+   cubehash_full( &ctx.cube, (byte*)hash2[7], 512, (const byte*)hash2[6], 64 );
+   cubehash_full( &ctx.cube, (byte*)hash3[7], 512, (const byte*)hash3[6], 64 );
+   
   sph_shavite512_init(&ctx.shavite);
   sph_shavite512(&ctx.shavite, (const void*) hash0[7], 64);
   sph_shavite512_close(&ctx.shavite, hash0[8]);
@@ -693,31 +718,23 @@ void x25x_4way_hash( void *output, const void *input )
   sph_shavite512(&ctx.shavite, (const void*) hash3[7], 64);
   sph_shavite512_close(&ctx.shavite, hash3[8]);

-   init_sd( &ctx.simd, 512 );
-   update_final_sd( &ctx.simd, (BitSequence*)hash0[9],
-                         (const BitSequence*)hash0[8], 512 );
-   init_sd( &ctx.simd, 512 );
-   update_final_sd( &ctx.simd, (BitSequence*)hash1[9],
-                         (const BitSequence*)hash1[8], 512 );
-   init_sd( &ctx.simd, 512 );
-   update_final_sd( &ctx.simd, (BitSequence*)hash2[9],
-                         (const BitSequence*)hash2[8], 512 );
-   init_sd( &ctx.simd, 512 );
-   update_final_sd( &ctx.simd, (BitSequence*)hash3[9],
-                         (const BitSequence*)hash3[8], 512 );
+   simd_full( &ctx.simd, (BitSequence*)hash0[9],
+                   (const BitSequence*)hash0[8], 512 );
+   simd_full( &ctx.simd, (BitSequence*)hash1[9],
+                   (const BitSequence*)hash1[8], 512 );
+   simd_full( &ctx.simd, (BitSequence*)hash2[9],
+                   (const BitSequence*)hash2[8], 512 );
+   simd_full( &ctx.simd, (BitSequence*)hash3[9],
+                   (const BitSequence*)hash3[8], 512 );

-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash0[10],
-                            (const BitSequence*)hash0[9], 512 );
-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash1[10],
-                            (const BitSequence*)hash1[9], 512 );
-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash2[10],
-                            (const BitSequence*)hash2[9], 512 );
-   init_echo( &ctx.echo, 512 );
-   update_final_echo ( &ctx.echo, (BitSequence*)hash3[10],
-                            (const BitSequence*)hash3[9], 512 );
+   echo_full( &ctx.echo, (BitSequence *)hash0[10], 512,
+                   (const BitSequence *)hash0[ 9], 64 );
+   echo_full( &ctx.echo, (BitSequence *)hash1[10], 512,
+                   (const BitSequence *)hash1[ 9], 64 );
+   echo_full( &ctx.echo, (BitSequence *)hash2[10], 512,
+                   (const BitSequence *)hash2[ 9], 64 );
+   echo_full( &ctx.echo, (BitSequence *)hash3[10], 512,
+                   (const BitSequence *)hash3[ 9], 64 );

   intrlv_4x64_512( vhash, hash0[10], hash1[10], hash2[10], hash3[10] );

@@ -870,43 +887,46 @@ void x25x_4way_hash( void *output, const void *input )
 int scanhash_x25x_4way( struct work* work, uint32_t max_nonce,
                   uint64_t *hashes_done, struct thr_info *mythr )
 {
-   uint32_t hash[16*4] __attribute__ ((aligned (128)));
-   uint32_t vdata[24*4] __attribute__ ((aligned (64)));
-   uint32_t lane_hash[8] __attribute__ ((aligned (32)));
-   uint32_t *hash7 = &(hash[7<<2]);
+   uint32_t hash[8*4] __attribute__ ((aligned (64)));
+   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
+   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
+   uint32_t *hashd7 = &(hash[ 7*4 ]);
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
-   __m256i  *noncev = (__m256i*)vdata + 9;   // aligned
-   uint32_t n = first_nonce;
   const uint32_t last_nonce = max_nonce - 4;
+   __m256i  *noncev = (__m256i*)vdata + 9;
+   uint32_t n = first_nonce;
   const int thr_id = mythr->id;
-   const uint32_t Htarg = ptarget[7];
+   const uint32_t targ32 = ptarget[7];
+   const bool bench = opt_benchmark;

-   if (opt_benchmark)
-      ((uint32_t*)ptarget)[7] = 0x08ff;
+   if ( bench ) ptarget[7] = 0x08ff;

   InitializeSWIFFTX();

   mm256_bswap32_intrlv80_4x64( vdata, pdata );
+   *noncev = mm256_intrlv_blend_32(
+                   _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
   do
   {
-      *noncev = mm256_intrlv_blend_32( mm256_bswap_32(
-              _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
      x25x_4way_hash( hash, vdata );

-      for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
+      for ( int lane = 0; lane < 4; lane++ )
+      if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
      {
         extr_lane_4x32( lane_hash, hash, lane, 256 );
-         if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
+         if ( valid_hash( lane_hash, ptarget ) )
         {
-              pdata[19] = n + lane;
-              submit_lane_solution( work, lane_hash, mythr, lane );
+            pdata[19] = bswap_32( n + lane );
+            submit_lane_solution( work, lane_hash, mythr, lane );
         }
      }
+      *noncev = _mm256_add_epi32( *noncev,
+                                  m256_const1_64( 0x0000000400000000 ) );
      n += 4;
-   } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
-
+   } while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
+   pdata[19] = n;
   *hashes_done = n - first_nonce;
   return 0;
 }
--- a/algo/x22/x25x.c
+++ b/algo/x22/x25x.c
@@ -201,42 +201,38 @@ void x25x_hash( void *output, const void *input )
 	memcpy(output, &hash[24], 32);
 }

-int scanhash_x25x( struct work* work, uint32_t max_nonce,
-                   uint64_t *hashes_done, struct thr_info *mythr )
+int scanhash_x25x( struct work *work, uint32_t max_nonce,
+             uint64_t *hashes_done, struct thr_info *mythr)
 {
   uint32_t edata[20] __attribute__((aligned(64)));
-   uint32_t hash[8] __attribute__((aligned(64)));
-	uint32_t *pdata = work->data;
-	uint32_t *ptarget = work->target;
-	const uint32_t first_nonce = pdata[19];
-   const uint32_t Htarg = ptarget[7];
-   uint32_t n = first_nonce;
+   uint32_t hash64[8] __attribute__((aligned(64)));
+   uint32_t *pdata = work->data;
+   uint32_t *ptarget = work->target;
+   uint32_t n = pdata[19];
+   const uint32_t first_nonce = n;
   const int thr_id = mythr->id;
+   const bool bench = opt_benchmark;

-	if (opt_benchmark)
-		((uint32_t*)ptarget)[7] = 0x08ff;
+   if ( bench ) ptarget[7] = 0x08ff;

   mm128_bswap32_80( edata, pdata );
-   
-	for (int k=0; k < 20; k++)
-		be32enc(&edata[k], pdata[k]);

   InitializeSWIFFTX();

   do
   {
-       pdata[19] = ++n;
-       be32enc( &edata[19], n );
-
-       x25x_hash( hash, edata );
-
-       if ( hash[7] < Htarg )
-       if ( fulltest( hash, ptarget ) && !opt_benchmark )
-           submit_solution( work, hash, mythr );
-    } while ( n < max_nonce && !work_restart[thr_id].restart );
-
-	 *hashes_done = pdata[19] - first_nonce;
-	 return 0;
+      edata[19] = n;
+      x25x_hash( hash64, edata );
+      if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
+      {
+         pdata[19] = bswap_32( n );
+         submit_solution( work, hash64, mythr );
+      }
+      n++;
+   } while ( n < max_nonce && !work_restart[thr_id].restart );
+   *hashes_done = n - first_nonce;
+   pdata[19] = n;
+   return 0;
 }

 #endif
--- a/20
+++ b/20
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.9.
+# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.2.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='cpuminer-opt'
 PACKAGE_TARNAME='cpuminer-opt'
-PACKAGE_VERSION='3.11.9'
-PACKAGE_STRING='cpuminer-opt 3.11.9'
+PACKAGE_VERSION='3.12.2'
+PACKAGE_STRING='cpuminer-opt 3.12.2'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''

@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures cpuminer-opt 3.11.9 to adapt to many kinds of systems.
+\`configure' configures cpuminer-opt 3.12.2 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@@ -1404,7 +1404,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of cpuminer-opt 3.11.9:";;
+     short | recursive ) echo "Configuration of cpuminer-opt 3.12.2:";;
   esac
  cat <<\_ACEOF

@@ -1509,7 +1509,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-cpuminer-opt configure 3.11.9
+cpuminer-opt configure 3.12.2
 generated by GNU Autoconf 2.69

 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by cpuminer-opt $as_me 3.11.9, which was
+It was created by cpuminer-opt $as_me 3.12.2, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  $ $0 $@
@@ -2993,7 +2993,7 @@ fi

 # Define the identity of the package.
 PACKAGE='cpuminer-opt'
- VERSION='3.11.9'
+ VERSION='3.12.2'


 cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by cpuminer-opt $as_me 3.11.9, which was
+This file was extended by cpuminer-opt $as_me 3.12.2, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-cpuminer-opt config.status 3.11.9
+cpuminer-opt config.status 3.12.2
 configured by $0, generated by GNU Autoconf 2.69,
  with options \\"\$ac_cs_config\\"

--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([cpuminer-opt], [3.11.9])
+AC_INIT([cpuminer-opt], [3.12.2])

 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -143,12 +143,6 @@ int api_thr_id = -1;
 bool stratum_need_reset = false;
 struct work_restart *work_restart = NULL;
 struct stratum_ctx stratum;
-bool jsonrpc_2 = false;
-char rpc2_id[64] = "";
-char *rpc2_blob = NULL;
-size_t rpc2_bloblen = 0;
-uint32_t rpc2_target = 0;
-char *rpc2_job_id = NULL;
 double opt_diff_factor = 1.0;
 double opt_target_factor = 1.0;
 uint32_t zr5_pok = 0;
@@ -165,20 +159,22 @@ double stratum_diff = 0.;
 double net_diff = 0.;
 double net_hashrate = 0.;
 uint64_t net_blocks = 0;
+uint32_t opt_work_size = 0;
+
 // conditional mining
-  bool conditional_state[MAX_CPUS] = { 0 };
-  double opt_max_temp = 0.0;
-  double opt_max_diff = 0.0;
-  double opt_max_rate = 0.0;
+bool conditional_state[MAX_CPUS] = { 0 };
+double opt_max_temp = 0.0;
+double opt_max_diff = 0.0;
+double opt_max_rate = 0.0;

-  uint32_t opt_work_size = 0;
-  char *opt_api_allow = NULL;
-  int opt_api_remote = 0;
-  int opt_api_listen = 0;
-//  int opt_api_listen = 4048; 
+// API
+static bool opt_api_enabled = false;
+char *opt_api_allow = NULL;
+int opt_api_listen = 0;
+int opt_api_remote = 0;
+char *default_api_allow = "127.0.0.1";
+int default_api_listen = 4048; 

-  pthread_mutex_t rpc2_job_lock;
-  pthread_mutex_t rpc2_login_lock;
  pthread_mutex_t applog_lock;
  pthread_mutex_t stats_lock;

@@ -360,9 +356,6 @@ void work_copy(struct work *dest, const struct work *src)

 int std_get_work_data_size() { return STD_WORK_DATA_SIZE; }

-bool jr2_work_decode( const json_t *val, struct work *work )
-{ return rpc2_job_decode( val, work ); }
-
 // Default
 bool std_le_work_decode( const json_t *val, struct work *work )
 {
@@ -890,8 +883,6 @@ static double   norm_diff_sum = 0.;
 static uint32_t last_block_height = 0;
 //static bool     new_job = false;
 static double   last_targetdiff = 0.;
-static double   ref_rate_hi = 0.;
-static double   ref_rate_lo = 1e100;
 #if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32))
 static uint32_t hi_temp = 0;
 #endif
@@ -939,7 +930,6 @@ void report_summary_log( bool force )
   uint64_t accepts = accept_sum;  accept_sum = 0;
   uint64_t rejects = reject_sum;  reject_sum = 0;
   uint64_t stales  = stale_sum;   stale_sum  = 0;
-//   int      latency  = latency_sum; latency_sum = 0;
   memcpy( &start_time, &five_min_start, sizeof start_time );
   memcpy( &five_min_start, &now, sizeof now );

@@ -950,34 +940,21 @@ void report_summary_log( bool force )
   
   double share_time = (double)et.tv_sec + (double)et.tv_usec / 1e6;
   double ghrate = global_hashrate;
-   double scaled_ghrate = ghrate;
+
   double shrate = share_time == 0. ? 0. : diff_to_hash * last_targetdiff
                                           * (double)(accepts) / share_time;
   double sess_hrate = uptime.tv_sec == 0. ? 0. : diff_to_hash * norm_diff_sum
                                                   / (double)uptime.tv_sec;
-   double scaled_shrate = shrate;
-//   int    avg_latency = 0;
-//   double latency_pc  = 0.;
-   double submit_rate = 0.;
+
+   double submit_rate = share_time == 0. ? 0. : (double)submits*60. / share_time;
   char shr_units[4] = {0};
   char ghr_units[4] = {0};
   char sess_hr_units[4] = {0};
   char et_str[24];
   char upt_str[24];

-//   if ( submits )  avg_latency = latency / submits;
-
-   if ( share_time != 0. )
-   {
-      submit_rate = (double)submits*60. / share_time;
-//      latency_pc =  (double)latency / (share_time * 10.);
-   }
-
-   if ( ghrate > ref_rate_hi )  ref_rate_hi = ghrate;
-   if ( ghrate < ref_rate_lo )  ref_rate_lo = ghrate;
-
-   scale_hash_for_display( &scaled_shrate, shr_units );
-   scale_hash_for_display( &scaled_ghrate, ghr_units );
+   scale_hash_for_display( &shrate, shr_units );
+   scale_hash_for_display( &ghrate, ghr_units );
   scale_hash_for_display( &sess_hrate, sess_hr_units );

   sprintf_et( et_str, et.tv_sec );
@@ -988,8 +965,26 @@ void report_summary_log( bool force )
                      submit_rate, (double)submitted_share_count*60. /
                    ( (double)uptime.tv_sec + (double)uptime.tv_usec / 1e6 ) );
   applog2( LOG_INFO, "Hash rate       %7.2f%sh/s   %7.2f%sh/s   (%.2f%sh/s)",
-                     scaled_shrate, shr_units, sess_hrate, sess_hr_units, 
-                     scaled_ghrate, ghr_units );
+                     shrate, shr_units, sess_hrate, sess_hr_units, 
+                     ghrate, ghr_units );
+
+   if ( accepted_share_count < submitted_share_count )
+   {
+      double lost_ghrate = uptime.tv_sec == 0. ? 0.
+                  : diff_to_hash * last_targetdiff
+                      * (double)(submitted_share_count - accepted_share_count )
+                    / (double)uptime.tv_sec;
+      double lost_shrate = share_time == 0. ? 0.
+               : diff_to_hash * last_targetdiff  * (double)(submits - accepts )
+                / share_time;
+      char lshr_units[4] = {0};
+      char lghr_units[4] = {0};
+      scale_hash_for_display( &lost_shrate, lshr_units );
+      scale_hash_for_display( &lost_ghrate, lghr_units );
+      applog2( LOG_INFO, "Lost hash rate  %7.2f%sh/s   %7.2f%sh/s",
+                     lost_shrate, lshr_units, lost_ghrate, lghr_units );
+   }
+
   applog2( LOG_INFO,"Submitted        %6d       %6d",
                       submits, submitted_share_count );
   applog2( LOG_INFO,"Accepted         %6d       %6d",
@@ -1003,29 +998,10 @@ void report_summary_log( bool force )
   if ( solved_block_count )
      applog2( LOG_INFO,"Blocks solved                 %6d",
                         solved_block_count );
-/*
-#if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32))
-
-   int temp = cpu_temp(0);
-   char tempstr[32];
-   if ( temp > hi_temp ) hi_temp = temp;
-
-   if ( use_colors && ( temp >= 70 ) )
-   {
-      if ( temp >= 80 )
-         sprintf( tempstr, "%s%dC%s", CL_WHT CL_RED, temp, CL_N );
-      else
-         sprintf( tempstr, "%s%dC%s", CL_WHT CL_YLW, temp, CL_N );
-   }
-   else
-      sprintf( tempstr, "%dC", temp );
-
-   applog2(LOG_INFO,"CPU temp             %s      max %dC", tempstr, hi_temp );
-
-#endif
-*/
 }

+bool lowdiff_debug = false;
+
 static int share_result( int result, struct work *null_work,
                         const char *reason )
 {
@@ -1038,7 +1014,6 @@ static int share_result( int result, struct work *null_work,
   char sres[48];
   char rres[48];
   char bres[48];
-//   char job_id[48];
   bool solved = false; 
   bool stale = false;
   char *acol = NULL, *bcol = NULL, *scol = NULL, *rcol = NULL;
@@ -1093,7 +1068,11 @@ static int share_result( int result, struct work *null_work,
        stale_share_count++;
     }
     else
+     {
        rejected_share_count++;
+        lowdiff_debug = true;
+  
+     }
   }

   // update global counters for summary report
@@ -1168,7 +1147,7 @@ static int share_result( int result, struct work *null_work,
           bres, share_time, latency );

   if ( have_stratum && !opt_quiet )
-      applog2( LOG_NOTICE, "Diff %.3g (%.3g%), %sBlock %d, %sJob %s" CL_WHT,
+      applog2( LOG_NOTICE, "Diff %.5g (%.3g%), %sBlock %d, %sJob %s" CL_WHT,
               my_stats.share_diff, share_ratio, bcol, stratum.block_height,
               scol, my_stats.job_id );

@@ -1186,13 +1165,15 @@ static int share_result( int result, struct work *null_work,
         for ( int i = 0; i < 8; i++ )
            be32enc( str2 + i, str1[7 - i] );
         bin2hex( str3, (unsigned char*)str2, 12 );
-         applog2( LOG_INFO, "Share diff:  %g, Hash: %s...", my_stats.share_diff, str3 );
+         applog2( LOG_INFO, "Share diff:  %.5g, Hash: %s...",
+                             my_stats.share_diff, str3 );

         diff_to_target( str1, my_stats.target_diff );
         for ( int i = 0; i < 8; i++ )
            be32enc( str2 + i, str1[7 - i] );
         bin2hex( str3, (unsigned char*)str2, 12 );
-         applog2( LOG_INFO, "Target diff: %g, Targ: %s...", str3 );
+         applog2( LOG_INFO, "Target diff: %.5g, Targ: %s...",
+                            my_stats.target_diff, str3 );
      }

      if ( unlikely( opt_reset_on_stale && stale ) )
@@ -1234,20 +1215,6 @@ void std_be_build_stratum_request( char *req, struct work *work )
   free( xnonce2str );
 }

-void jr2_build_stratum_request( char *req, struct work *work )
-{
-   uchar hash[32];
-   char noncestr[9];
-   bin2hex( noncestr, (char*) algo_gate.get_nonceptr( work->data ),
-                      sizeof(uint32_t) );
-   algo_gate.hash_suw( hash, work->data );
-   char *hashhex = abin2hex(hash, 32);
-   snprintf( req, JSON_BUF_LEN,
-        "{\"method\": \"submit\", \"params\": {\"id\": \"%s\", \"job_id\": \"%s\", \"nonce\": \"%s\", \"result\": \"%s\"}, \"id\":4}",
-          rpc2_id, work->job_id, noncestr, hashhex );
-   free( hashhex );
-}
-
 bool std_le_submit_getwork_result( CURL *curl, struct work *work )
 {
   char req[JSON_BUF_LEN];
@@ -1316,53 +1283,6 @@ bool std_be_submit_getwork_result( CURL *curl, struct work *work )
   return true;
 }

-
-bool jr2_submit_getwork_result( CURL *curl, struct work *work )
-{
-   json_t *val, *res;
-   char req[JSON_BUF_LEN];
-   char noncestr[9];
-   uchar hash[32];
-   char *hashhex;
-   bin2hex( noncestr, (char*) algo_gate.get_nonceptr( work->data ),
-                      sizeof(uint32_t) );
-   algo_gate.hash_suw( hash, work->data );
-   hashhex = abin2hex( &hash[0], 32 );
-   snprintf( req, JSON_BUF_LEN, "{\"method\": \"submit\", \"params\": "
-       "{\"id\": \"%s\", \"job_id\": \"%s\", \"nonce\": \"%s\", \"result\": \"%s\"},"
-       "\"id\":4}\r\n",
-       rpc2_id, work->job_id, noncestr, hashhex );
-   free( hashhex );
-   // issue JSON-RPC request 
-   val = json_rpc2_call( curl, rpc_url, rpc_userpass, req, NULL, 0 );
-   if (unlikely( !val ))
-   {
-      applog(LOG_ERR, "submit_upstream_work json_rpc_call failed");
-      return false;
-   }
-   res = json_object_get( val, "result" );
-   json_t *status = json_object_get( res, "status" );
-   bool valid = !strcmp( status ? json_string_value( status ) : "", "OK" );
-   if (valid)
-       share_result( valid, work, NULL );
-   else
-   {
-       json_t *err = json_object_get( res, "error" );
-       const char *sreason = json_string_value( json_object_get(
-                                                      err, "message" ) );
-       share_result( valid, work, sreason );
-       if ( !strcasecmp( "Invalid job id", sreason ) )
-       {
-            work_free( work );
-            work_copy( work, &g_work );
-            g_work_time = 0;
-            restart_threads();
-       }
-   }
-   json_decref(val);
-   return true;
-}
-
 char* std_malloc_txs_request( struct work *work )
 {
  char *req;
@@ -1497,18 +1417,10 @@ static bool get_upstream_work( CURL *curl, struct work *work )
 start:
   gettimeofday( &tv_start, NULL );

-   if ( jsonrpc_2 )
-   {
-      char s[128];
-      snprintf( s, 128, "{\"method\": \"getjob\", \"params\": {\"id\": \"%s\"}, \"id\":1}\r\n", rpc2_id );
-      val = json_rpc2_call( curl, rpc_url, rpc_userpass, s, NULL, 0 );
-   }
-   else
-   {
-      val = json_rpc_call( curl, rpc_url, rpc_userpass,
+   val = json_rpc_call( curl, rpc_url, rpc_userpass,
 		           have_gbt ? gbt_req : getwork_req, &err,
                           have_gbt ? JSON_RPC_QUIET_404 : 0);
-   }
+ 
   gettimeofday( &tv_end, NULL );

   if ( have_stratum )
@@ -1632,68 +1544,6 @@ static bool workio_submit_work(struct workio_cmd *wc, CURL *curl)
   return true;
 }

-bool rpc2_login(CURL *curl)
-{
-	json_t *val;
-	bool rc = false;
-	struct timeval tv_start, tv_end, diff;
-	char s[JSON_BUF_LEN];
-
-	if (!jsonrpc_2)
-		return false;
-	snprintf(s, JSON_BUF_LEN, "{\"method\": \"login\", \"params\": {"
-		"\"login\": \"%s\", \"pass\": \"%s\", \"agent\": \"%s\"}, \"id\": 1}",
-		rpc_user, rpc_pass, USER_AGENT);
-	gettimeofday(&tv_start, NULL);
-	val = json_rpc_call(curl, rpc_url, rpc_userpass, s, NULL, 0);
-	gettimeofday(&tv_end, NULL);
-	if (!val)
-		goto end;
-	rc = rpc2_login_decode(val);
-	json_t *result = json_object_get(val, "result");
-	if (!result)
-		goto end;
-	json_t *job = json_object_get(result, "job");
-	if (!rpc2_job_decode(job, &g_work))
-		goto end;
-	if (opt_debug && rc)
-        {
-		timeval_subtract(&diff, &tv_end, &tv_start);
-		applog(LOG_DEBUG, "DEBUG: authenticated in %d ms",
-				diff.tv_sec * 1000 + diff.tv_usec / 1000);
-	}
-	json_decref(val);
-end:
-	return rc;
-}
-
-bool rpc2_workio_login(CURL *curl)
-{
-   int failures = 0;
-   if (opt_benchmark)
-	return true;
-   /* submit solution to bitcoin via JSON-RPC */
-   pthread_mutex_lock(&rpc2_login_lock);
-   while (!rpc2_login(curl))
-   {
-      if (unlikely((opt_retries >= 0) && (++failures > opt_retries)))
-      {
-	applog(LOG_ERR, "...terminating workio thread");
-	pthread_mutex_unlock(&rpc2_login_lock);
-	return false;
-      }
-
-      /* pause, then restart work-request loop */
-      if (!opt_benchmark)
-          applog(LOG_ERR, "...retry after %d seconds", opt_fail_pause);
-      sleep(opt_fail_pause);
-      pthread_mutex_unlock(&rpc2_login_lock);
-      pthread_mutex_lock(&rpc2_login_lock);
-   }
-   pthread_mutex_unlock(&rpc2_login_lock);
-   return true;
-}
-
 static void *workio_thread(void *userdata)
 {
 	struct thr_info *mythr = (struct thr_info *) userdata;
@@ -1706,8 +1556,6 @@ static void *workio_thread(void *userdata)
 		applog(LOG_ERR, "CURL initialization failed");
 		return NULL;
 	}
-	if ( jsonrpc_2 && !have_stratum )
-		ok = rpc2_workio_login( curl );

   while ( likely(ok) )
   {
@@ -1759,8 +1607,8 @@ static bool get_work(struct thr_info *thr, struct work *work)
  
      // this overwrites much of the for loop init
      memset( work->data + algo_gate.nonce_index, 0x00, 52);  // nonce..nonce+52
-		work->data[20] = 0x80000000;  // extraheader not used for jr2
-		work->data[31] = 0x00000280;  // extraheader not used for jr2
+		work->data[20] = 0x80000000; 
+		work->data[31] = 0x00000280;
 		return true;
 	}
 	/* fill out work request message */
@@ -1822,12 +1670,13 @@ static inline double u256_to_double( const uint64_t *u )

 void work_set_target_ratio( struct work* work, const void *hash )
 {
-   double dhash;
-
-   dhash = u256_to_double( (const uint64_t*)hash );
-   if ( likely( dhash > 0. ) )
-      work->sharediff = work->targetdiff *
+   if ( likely( hash ) )
+   {
+      double dhash = u256_to_double( (const uint64_t*)hash );
+      if ( likely( dhash > 0. ) )
+         work->sharediff = work->targetdiff *
             u256_to_double( (const uint64_t*)( work->target ) ) / dhash;
+   }
   else
      work->sharediff = 0.;

@@ -1843,8 +1692,8 @@ void work_set_target_ratio( struct work* work, const void *hash )
   share_stats[ s_put_ptr ].net_diff = net_diff;
   share_stats[ s_put_ptr ].stratum_diff = stratum_diff;
   share_stats[ s_put_ptr ].target_diff = work->targetdiff;
-   strcpy( share_stats[ s_put_ptr ].job_id, work->job_id );
-
+   ( (uint64_t*)share_stats[ s_put_ptr ].job_id )[3] = 0;
+   strncpy( share_stats[ s_put_ptr ].job_id, work->job_id, 30 );
   s_put_ptr = stats_ptr_incr( s_put_ptr );

   pthread_mutex_unlock( &stats_lock );
@@ -1860,7 +1709,17 @@ bool submit_solution( struct work *work, const void *hash,
     if ( !opt_quiet )
        applog( LOG_NOTICE, "%d submitted by thread %d, job %s",
            submitted_share_count, thr->id, work->job_id );
-     return true;
+
+if ( lowdiff_debug )
+{
+   uint32_t* h = (uint32_t*)hash;
+   uint32_t* t = (uint32_t*)work->target;
+   applog(LOG_INFO,"Hash[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
+                              h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0]);
+   applog(LOG_INFO,"Targ[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
+                              t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0]);
+}
+    return true;
  }
  else
     applog( LOG_WARNING, "%d failed to submit share.",
@@ -1878,6 +1737,18 @@ bool submit_lane_solution( struct work *work, const void *hash,
     if ( !opt_quiet )
        applog( LOG_NOTICE, "%d submitted by thread %d, lane %d, job %s",
            submitted_share_count, thr->id, lane, work->job_id );
+
+if ( lowdiff_debug )
+{
+   uint32_t* h = (uint32_t*)hash;
+   uint32_t* t = (uint32_t*)work->target;
+   applog(LOG_INFO,"Hash[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
+                              h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0]);
+   applog(LOG_INFO,"Targ[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
+                              t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0]);
+}
+
+
     return true;
  }
  else
@@ -1886,22 +1757,6 @@ bool submit_lane_solution( struct work *work, const void *hash,
  return false;
 }

-bool rpc2_stratum_job( struct stratum_ctx *sctx, json_t *params )
-{
-	bool ret = false;
-	pthread_mutex_lock(&sctx->work_lock);
-	ret = rpc2_job_decode(params, &sctx->work);
-	if (ret)
-        {
-           if (sctx->job.job_id)
-		free(sctx->job.job_id);
-	   sctx->job.job_id = strdup(sctx->work.job_id);
- 	}
-
-	pthread_mutex_unlock(&sctx->work_lock);
-	return ret;
-}
-
 static bool wanna_mine(int thr_id)
 {
 	bool state = true;
@@ -1988,21 +1843,10 @@ double std_calc_network_diff( struct work* work )
   return d;
 }

-uint32_t *std_get_nonceptr( uint32_t *work_data )
-{
-   return work_data + algo_gate.nonce_index;
-}
-
-uint32_t *jr2_get_nonceptr( uint32_t *work_data )
-{
-   // nonce is misaligned, use byte offset
-   return (uint32_t*) ( ((uint8_t*) work_data) + algo_gate.nonce_index );
-}
-
 void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
                     uint32_t *end_nonce_ptr )
 {
-   uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
+   uint32_t *nonceptr = work->data + algo_gate.nonce_index;

   bool force_new_work = work->job_id ? strtoul(   work->job_id, NULL, 16 ) !=
                                        strtoul( g_work->job_id, NULL, 16 )
@@ -2021,28 +1865,6 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
       ++(*nonceptr);
 }

-void jr2_get_new_work( struct work* work, struct work* g_work, int thr_id,
-                     uint32_t *end_nonce_ptr )
-{
-   uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
-
-   // byte data[ 0..38, 43..75 ], skip over misaligned nonce [39..42]
-   if ( memcmp( work->data, g_work->data, algo_gate.nonce_index )
-     || memcmp( ((uint8_t*) work->data)   + JR2_WORK_CMP_INDEX_2,
-                ((uint8_t*) g_work->data) + JR2_WORK_CMP_INDEX_2,
-                                             JR2_WORK_CMP_SIZE_2 ) )
-   {
-      work_free( work );
-      work_copy( work, g_work );
-      *nonceptr = ( 0xffffffU / opt_n_threads ) * thr_id
-                   + ( *nonceptr & 0xff000000U );
-      *end_nonce_ptr = ( 0xffffffU / opt_n_threads ) * (thr_id+1)
-                        + ( *nonceptr & 0xff000000U ) - 0x20;
-   }
-   else
-       ++(*nonceptr);
-}
-
 bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
                           int thr_id )
 {
@@ -2060,7 +1882,7 @@ static void *miner_thread( void *userdata )
   struct   thr_info *mythr = (struct thr_info *) userdata;
   int      thr_id = mythr->id;
   uint32_t max_nonce;
-   struct timeval cpu_temp_time = {0}; 
+   uint32_t *nonceptr = work.data + algo_gate.nonce_index;

   // end_nonce gets read before being set so it needs to be initialized
   // what is an appropriate value that is completely neutral?
@@ -2170,7 +1992,8 @@ static void *miner_thread( void *userdata )
          if ( have_stratum )
          {
      	     pthread_mutex_lock( &g_work_lock );
-              if ( *algo_gate.get_nonceptr( work.data ) >= end_nonce )
+
+              if ( *nonceptr >= end_nonce )
                 algo_gate.stratum_gen_work( &stratum, &g_work );
              algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce );
              pthread_mutex_unlock( &g_work_lock );
@@ -2181,7 +2004,7 @@ static void *miner_thread( void *userdata )
 	          pthread_mutex_lock( &g_work_lock );

             if ( time(NULL) - g_work_time >= min_scantime
-                  || *algo_gate.get_nonceptr( work.data ) >= end_nonce )
+                  || *nonceptr >= end_nonce )
             {
 	             if ( unlikely( !get_work( mythr, &g_work ) ) )
                {
@@ -2242,7 +2065,7 @@ static void *miner_thread( void *userdata )
       // Select nonce range for approx 1 min duration based
       // on hashrate, initial value arbitrarilly set to 1000 just to get
       // a sample hashrate for the next time.
-       uint32_t work_nonce = *( algo_gate.get_nonceptr( work.data ) );
+       uint32_t work_nonce = *nonceptr;
       max64 = 60 * thr_hashrates[thr_id];
       if ( max64 <= 0)
          max64 = 1000;
@@ -2301,15 +2124,15 @@ static void *miner_thread( void *userdata )
       if (!opt_quiet && mythr->id == 0 )
       {
          int temp = cpu_temp(0);
+          static struct timeval cpu_temp_time = {0};
          timeval_subtract( &diff, &tv_end, &cpu_temp_time );
-          int wait = temp >= 80 ? 30 : temp >= 70 ? 90 : 180;
+          int wait = temp >= 80 ? 30 : temp >= 70 ? 60 : 120;
          if ( ( diff.tv_sec > wait ) || ( temp > hi_temp ) )
          {
             char tempstr[32];
             int lo_freq, hi_freq;
             linux_cpu_hilo_freq( &lo_freq, &hi_freq );
             memcpy( &cpu_temp_time, &tv_end, sizeof(cpu_temp_time) );
-             if ( temp > hi_temp ) hi_temp = temp;
             if ( use_colors && ( temp >= 70 ) )
             {
                if ( temp >= 80 )
@@ -2321,6 +2144,7 @@ static void *miner_thread( void *userdata )
                sprintf( tempstr, "%d C", temp );
             applog( LOG_INFO,"CPU temp: curr %s (max %d), Freq: %.3f/%.3f GHz",
                     tempstr, hi_temp, (float)lo_freq / 1e6, (float)hi_freq/ 1e6 );
+             if ( temp > hi_temp ) hi_temp = temp;
          }
       }
 #endif
@@ -2398,25 +2222,6 @@ json_t *std_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
   return val;
 }

-json_t *jr2_longpoll_rpc_call( CURL *curl, int *err )
-{
-   json_t *val;
-   char req[128];
-
-   pthread_mutex_lock( &rpc2_login_lock );
-   if ( !strlen(rpc2_id) )
-   {
-     pthread_mutex_unlock( &rpc2_login_lock );
-     sleep(1);
-     return NULL;
-   }
-   snprintf( req, 128, "{\"method\": \"getjob\", \"params\": {\"id\": \"%s\"}, \"id\":1}\r\n", rpc2_id );
-   pthread_mutex_unlock( &rpc2_login_lock );
-   val = json_rpc2_call( curl, rpc_url, rpc_userpass, req, err,
-                         JSON_RPC_LONGPOLL );
-   return val;
-}
-
 static void *longpoll_thread(void *userdata)
 {
   struct thr_info *mythr = (struct thr_info*) userdata;
@@ -2478,11 +2283,8 @@ start:
      double start_diff = 0.0;
      json_t *res, *soval;
 	   res = json_object_get(val, "result");
-	   if (!jsonrpc_2)
-      {
-         soval = json_object_get(res, "submitold");
-	      submit_old = soval ? json_is_true(soval) : false;
-	   }
+      soval = json_object_get(res, "submitold");
+      submit_old = soval ? json_is_true(soval) : false;
 	   pthread_mutex_lock(&g_work_lock);
 	   start_job_id = g_work.job_id ? strdup(g_work.job_id) : NULL;
 	   if (have_gbt)
@@ -2547,48 +2349,12 @@ out:
 	return NULL;
 }

-bool std_stratum_handle_response( json_t *val )
-{
-    bool valid = false;
-    json_t *err_val, *res_val, *id_val;
-    res_val = json_object_get( val, "result" );
-    err_val = json_object_get( val, "error" );
-    id_val  = json_object_get( val, "id" );
-
-    if ( !res_val || json_integer_value(id_val) < 4 )
-         return false;
-    valid = json_is_true( res_val );
-    share_result( valid, NULL, err_val ?
-                  json_string_value( json_array_get(err_val, 1) ) : NULL );
-    return true;
-}
-
-bool jr2_stratum_handle_response( json_t *val )
-{
-    bool valid = false;
-    json_t *err_val, *res_val;
-    res_val = json_object_get( val, "result" );
-    err_val = json_object_get( val, "error" );
-
-    if ( !res_val && !err_val )
-        return false;
-    json_t *status = json_object_get( res_val, "status" );
-    if ( status ) 
-    {
-        const char *s = json_string_value( status );
-        valid = !strcmp( s, "OK" ) && json_is_null( err_val );
-    }
-    else
-        valid = json_is_null( err_val );
-    share_result( valid, NULL, err_val ? json_string_value(err_val) : NULL );
-    return true;
-}
-
 static bool stratum_handle_response( char *buf )
 {
-	json_t *val, *id_val, *res_val;
+	json_t *val, *id_val, *res_val, *err_val;
 	json_error_t err;
 	bool ret = false;
+   bool share_accepted = false;

 	val = JSON_LOADS( buf, &err );
 	if (!val)
@@ -2602,8 +2368,15 @@ static bool stratum_handle_response( char *buf )
   id_val = json_object_get( val, "id" );
 	if ( !id_val || json_is_null(id_val) )
 		goto out;
-   if ( !algo_gate.stratum_handle_response( val ) )
+
+   err_val = json_object_get( val, "error" );
+
+   if ( !res_val || json_integer_value( id_val ) < 4 )
      goto out;
+   share_accepted = json_is_true( res_val );
+   share_result( share_accepted, NULL, err_val ?
+                 json_string_value( json_array_get(err_val, 1) ) : NULL );
+
 	ret = true;
 out:
 	if (val)
@@ -2726,7 +2499,7 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
      if ( !opt_quiet )
      {
         applog2( LOG_INFO, "%s: %s", algo_names[opt_algo], short_url );
-         applog2( LOG_INFO, "Diff: Net %.3g, Stratum %.3g, Target %.3g",
+         applog2( LOG_INFO, "Diff: Net %.5g, Stratum %.5g, Target %.5g",
                            net_diff, stratum_diff, last_targetdiff );

         if ( likely( hr > 0. ) )
@@ -2766,31 +2539,6 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
   }  // new diff/block   
 }

-void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
-{
-   pthread_mutex_lock( &sctx->work_lock );
-   work_free( g_work );
-   work_copy( g_work, &sctx->work );
-   pthread_mutex_unlock( &sctx->work_lock );
-/*
-   if ( stratum_diff != sctx->job.diff )
-      applog( LOG_BLUE, "New stratum diff %g, block %d, job %s",
-                        sctx->job.diff, sctx->block_height, g_work->job_id );
-   else if ( last_block_height != sctx->block_height )
-      applog( LOG_BLUE, "New block %d, job %s",
-                         sctx->block_height, g_work->job_id );
-   else if ( g_work->job_id )
-      applog( LOG_BLUE,"New job %s", g_work->job_id );
-*/   
-   if ( last_block_height != stratum.block_height )
-   {
-      applog(LOG_BLUE, "Stratum detected new block");
-      last_block_height = stratum.block_height;
-   }
-   if ( stratum_diff != g_work->stratum_diff )
-      stratum_diff = g_work->stratum_diff;
-}
-
 static void *stratum_thread(void *userdata )
 {
   struct thr_info *mythr = (struct thr_info *) userdata;
@@ -2844,11 +2592,6 @@ static void *stratum_thread(void *userdata )
         }
         else
            applog(LOG_BLUE,"Stratum connection established" );
-         if ( unlikely( jsonrpc_2 ) )
-         {
-             work_free(&g_work);
-             work_copy(&g_work, &stratum.work);
-         }
      }

      report_summary_log( ( stratum_diff != stratum.job.diff )
@@ -3010,11 +2753,11 @@ void parse_arg(int key, char *arg )
           break;

 	case 'b':
-		p = strstr(arg, ":");
+      opt_api_enabled = true;
+      p = strstr(arg, ":");
 		if (p) {
 			/* ip:port */
 			if (p - arg > 0) {
-				free(opt_api_allow);
 				opt_api_allow = strdup(arg);
 				opt_api_allow[p - arg] = '\0';
 			}
@@ -3024,10 +2767,12 @@ void parse_arg(int key, char *arg )
 			/* ip only */
 			free(opt_api_allow);
 			opt_api_allow = strdup(arg);
-		}
+         opt_api_listen = default_api_listen;
+      }
 		else if (arg) {
 			/* port or 0 to disable */
-			opt_api_listen = atoi(arg);
+         opt_api_allow = default_api_allow;      
+         opt_api_listen = atoi(arg);
 		}
      break;
 	case 1030: /* --api-remote */
@@ -3635,7 +3380,6 @@ int main(int argc, char *argv[])

 	rpc_user = strdup("");
 	rpc_pass = strdup("");
-//	opt_api_allow = strdup("127.0.0.1"); /* 0.0.0.0 for all ips */

   parse_cmdline(argc, argv);

@@ -3735,8 +3479,6 @@ int main(int argc, char *argv[])

 	pthread_mutex_init( &stats_lock, NULL );
 	pthread_mutex_init( &g_work_lock, NULL );
-	pthread_mutex_init( &rpc2_job_lock, NULL );
-	pthread_mutex_init( &rpc2_login_lock, NULL );
 	pthread_mutex_init( &stratum.sock_lock, NULL );
 	pthread_mutex_init( &stratum.work_lock, NULL );

@@ -3911,7 +3653,7 @@ int main(int argc, char *argv[])
 			tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
 	}

-	if (opt_api_listen)
+	if ( opt_api_enabled )
   {
 		/* api thread */
 		api_thr_id = opt_n_threads + 3;
--- a/crypto/aesb.c
+++ b/crypto/aesb.c
@@ -1,170 +0,0 @@
-/*
---------------------------------------------------------------------------
-Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
-
-The redistribution and use of this software (with or without changes)
-is allowed without the payment of fees or royalties provided that:
-
-  source code distributions include the above copyright notice, this
-  list of conditions and the following disclaimer;
-
-  binary distributions include the above copyright notice, this list
-  of conditions and the following disclaimer in their documentation.
-
-This software is provided 'as is' with no explicit or implied warranties
-in respect of its operation, including, but not limited to, correctness
-and fitness for purpose.
---------------------------------------------------------------------------
-Issue Date: 20/12/2007
-*/
-
-#include <stdint.h>
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-#define TABLE_ALIGN     32
-#define WPOLY           0x011b
-#define N_COLS          4
-#define AES_BLOCK_SIZE  16
-#define RC_LENGTH       (5 * (AES_BLOCK_SIZE / 4 - 2))
-
-#if defined(_MSC_VER)
-#define ALIGN __declspec(align(TABLE_ALIGN))
-#elif defined(__GNUC__)
-#define ALIGN __attribute__ ((aligned(16)))
-#else
-#define ALIGN
-#endif
-
-#define rf1(r,c) (r)
-#define word_in(x,c) (*((uint32_t*)(x)+(c)))
-#define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v))
-
-#define s(x,c) x[c]
-#define si(y,x,c) (s(y,c) = word_in(x, c))
-#define so(y,x,c) word_out(y, c, s(x,c))
-#define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3)
-#define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
-#define round(y,x,k) \
-y[0] = (k)[0]  ^ (t_fn[0][x[0] & 0xff] ^ t_fn[1][(x[1] >> 8) & 0xff] ^ t_fn[2][(x[2] >> 16) & 0xff] ^ t_fn[3][x[3] >> 24]); \
-y[1] = (k)[1]  ^ (t_fn[0][x[1] & 0xff] ^ t_fn[1][(x[2] >> 8) & 0xff] ^ t_fn[2][(x[3] >> 16) & 0xff] ^ t_fn[3][x[0] >> 24]); \
-y[2] = (k)[2]  ^ (t_fn[0][x[2] & 0xff] ^ t_fn[1][(x[3] >> 8) & 0xff] ^ t_fn[2][(x[0] >> 16) & 0xff] ^ t_fn[3][x[1] >> 24]); \
-y[3] = (k)[3]  ^ (t_fn[0][x[3] & 0xff] ^ t_fn[1][(x[0] >> 8) & 0xff] ^ t_fn[2][(x[1] >> 16) & 0xff] ^ t_fn[3][x[2] >> 24]);
-#define to_byte(x) ((x) & 0xff)
-#define bval(x,n) to_byte((x) >> (8 * (n)))
-
-#define fwd_var(x,r,c)\
- ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
- : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
- : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
- :          ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
-
-#define fwd_rnd(y,x,k,c)  (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
-
-#define sb_data(w) {\
-    w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
-    w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
-    w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
-    w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
-    w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
-    w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
-    w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
-    w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
-    w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
-    w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
-    w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
-    w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
-    w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
-    w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
-    w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
-    w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
-    w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
-    w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
-    w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
-    w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
-    w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
-    w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
-    w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
-    w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
-    w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
-    w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
-    w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
-    w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
-    w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
-    w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
-    w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
-    w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
-
-#define rc_data(w) {\
-    w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\
-    w(0x1b), w(0x36) }
-
-#define bytes2word(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \
-    ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
-
-#define h0(x)   (x)
-#define w0(p)   bytes2word(p, 0, 0, 0)
-#define w1(p)   bytes2word(0, p, 0, 0)
-#define w2(p)   bytes2word(0, 0, p, 0)
-#define w3(p)   bytes2word(0, 0, 0, p)
-
-#define u0(p)   bytes2word(f2(p), p, p, f3(p))
-#define u1(p)   bytes2word(f3(p), f2(p), p, p)
-#define u2(p)   bytes2word(p, f3(p), f2(p), p)
-#define u3(p)   bytes2word(p, p, f3(p), f2(p))
-
-#define v0(p)   bytes2word(fe(p), f9(p), fd(p), fb(p))
-#define v1(p)   bytes2word(fb(p), fe(p), f9(p), fd(p))
-#define v2(p)   bytes2word(fd(p), fb(p), fe(p), f9(p))
-#define v3(p)   bytes2word(f9(p), fd(p), fb(p), fe(p))
-
-#define f2(x)   ((x<<1) ^ (((x>>7) & 1) * WPOLY))
-#define f4(x)   ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
-#define f8(x)   ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) ^ (((x>>5) & 4) * WPOLY))
-#define f3(x)   (f2(x) ^ x)
-#define f9(x)   (f8(x) ^ x)
-#define fb(x)   (f8(x) ^ f2(x) ^ x)
-#define fd(x)   (f8(x) ^ f4(x) ^ x)
-#define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
-
-#define t_dec(m,n) t_##m##n
-#define t_set(m,n) t_##m##n
-#define t_use(m,n) t_##m##n
-
-#define d_4(t,n,b,e,f,g,h) ALIGN const t n[4][256] = { b(e), b(f), b(g), b(h) }
-
-#define four_tables(x,tab,vf,rf,c) \
-    (tab[0][bval(vf(x,0,c),rf(0,c))] \
-    ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
-    ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
-    ^ tab[3][bval(vf(x,3,c),rf(3,c))])
-
-d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3);
-
-void aesb_single_round(const uint8_t *in, uint8_t *out, uint8_t *expandedKey)
-{
-    round(((uint32_t*) out), ((uint32_t*) in), ((uint32_t*) expandedKey));
-}
-
-void aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey)
-{
-    uint32_t b1[4];
-    round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey));
-    round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 1 * N_COLS);
-    round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 2 * N_COLS);
-    round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 3 * N_COLS);
-    round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 4 * N_COLS);
-    round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 5 * N_COLS);
-    round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 6 * N_COLS);
-    round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 7 * N_COLS);
-    round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 8 * N_COLS);
-    round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 9 * N_COLS);
-}
-
-
-#if defined(__cplusplus)
-}
-#endif
--- a/crypto/c_blake256.c
+++ b/crypto/c_blake256.c
@@ -1,326 +0,0 @@
-/*
- * The blake256_* and blake224_* functions are largely copied from
- * blake256_light.c and blake224_light.c from the BLAKE website:
- *
- *     http://131002.net/blake/
- *
- * The hmac_* functions implement HMAC-BLAKE-256 and HMAC-BLAKE-224.
- * HMAC is specified by RFC 2104.
- */
-
-#include <string.h>
-#include <stdio.h>
-#include <stdint.h>
-#include "c_blake256.h"
-
-#define U8TO32(p) \
-    (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) |    \
-     ((uint32_t)((p)[2]) <<  8) | ((uint32_t)((p)[3])      ))
-#define U32TO8(p, v) \
-    (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
-    (p)[2] = (uint8_t)((v) >>  8); (p)[3] = (uint8_t)((v)      );
-
-const uint8_t sigma[][16] = {
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
-    {14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
-    {11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
-    { 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8},
-    { 9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13},
-    { 2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9},
-    {12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11},
-    {13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10},
-    { 6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5},
-    {10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13, 0},
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
-    {14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
-    {11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
-    { 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8}
-};
-
-const uint32_t cst[16] = {
-    0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344,
-    0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89,
-    0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C,
-    0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917
-};
-
-static const uint8_t padding[] = {
-    0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-};
-
-
-void blake256_compress(state *S, const uint8_t *block) {
-    uint32_t v[16], m[16], i;
-
-#define ROT(x,n) (((x)<<(32-n))|((x)>>(n)))
-#define G(a,b,c,d,e)                                      \
-    v[a] += (m[sigma[i][e]] ^ cst[sigma[i][e+1]]) + v[b]; \
-    v[d] = ROT(v[d] ^ v[a],16);                           \
-    v[c] += v[d];                                         \
-    v[b] = ROT(v[b] ^ v[c],12);                           \
-    v[a] += (m[sigma[i][e+1]] ^ cst[sigma[i][e]])+v[b];   \
-    v[d] = ROT(v[d] ^ v[a], 8);                           \
-    v[c] += v[d];                                         \
-    v[b] = ROT(v[b] ^ v[c], 7);
-
-    for (i = 0; i < 16; ++i) m[i] = U8TO32(block + i * 4);
-    for (i = 0; i < 8;  ++i) v[i] = S->h[i];
-    v[ 8] = S->s[0] ^ 0x243F6A88;
-    v[ 9] = S->s[1] ^ 0x85A308D3;
-    v[10] = S->s[2] ^ 0x13198A2E;
-    v[11] = S->s[3] ^ 0x03707344;
-    v[12] = 0xA4093822;
-    v[13] = 0x299F31D0;
-    v[14] = 0x082EFA98;
-    v[15] = 0xEC4E6C89;
-
-    if (S->nullt == 0) {
-        v[12] ^= S->t[0];
-        v[13] ^= S->t[0];
-        v[14] ^= S->t[1];
-        v[15] ^= S->t[1];
-    }
-
-    for (i = 0; i < 14; ++i) {
-        G(0, 4,  8, 12,  0);
-        G(1, 5,  9, 13,  2);
-        G(2, 6, 10, 14,  4);
-        G(3, 7, 11, 15,  6);
-        G(3, 4,  9, 14, 14);
-        G(2, 7,  8, 13, 12);
-        G(0, 5, 10, 15,  8);
-        G(1, 6, 11, 12, 10);
-    }
-
-    for (i = 0; i < 16; ++i) S->h[i % 8] ^= v[i];
-    for (i = 0; i < 8;  ++i) S->h[i] ^= S->s[i % 4];
-}
-
-void blake256_init(state *S) {
-    S->h[0] = 0x6A09E667;
-    S->h[1] = 0xBB67AE85;
-    S->h[2] = 0x3C6EF372;
-    S->h[3] = 0xA54FF53A;
-    S->h[4] = 0x510E527F;
-    S->h[5] = 0x9B05688C;
-    S->h[6] = 0x1F83D9AB;
-    S->h[7] = 0x5BE0CD19;
-    S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
-    S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
-}
-
-void blake224_init(state *S) {
-    S->h[0] = 0xC1059ED8;
-    S->h[1] = 0x367CD507;
-    S->h[2] = 0x3070DD17;
-    S->h[3] = 0xF70E5939;
-    S->h[4] = 0xFFC00B31;
-    S->h[5] = 0x68581511;
-    S->h[6] = 0x64F98FA7;
-    S->h[7] = 0xBEFA4FA4;
-    S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
-    S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
-}
-
-// datalen = number of bits
-void blake256_update(state *S, const uint8_t *data, uint64_t datalen) {
-    int left = S->buflen >> 3;
-    int fill = 64 - left;
-
-    if (left && (((datalen >> 3) & 0x3F) >= (unsigned) fill)) {
-        memcpy((void *) (S->buf + left), (void *) data, fill);
-        S->t[0] += 512;
-        if (S->t[0] == 0) S->t[1]++;
-        blake256_compress(S, S->buf);
-        data += fill;
-        datalen -= (fill << 3);
-        left = 0;
-    }
-
-    while (datalen >= 512) {
-        S->t[0] += 512;
-        if (S->t[0] == 0) S->t[1]++;
-        blake256_compress(S, data);
-        data += 64;
-        datalen -= 512;
-    }
-
-    if (datalen > 0) {
-        memcpy((void *) (S->buf + left), (void *) data, (size_t) (datalen >> 3));
-        S->buflen = (left << 3) + (int) datalen;
-    } else {
-        S->buflen = 0;
-    }
-}
-
-// datalen = number of bits
-void blake224_update(state *S, const uint8_t *data, uint64_t datalen) {
-    blake256_update(S, data, datalen);
-}
-
-void blake256_final_h(state *S, uint8_t *digest, uint8_t pa, uint8_t pb) {
-    uint8_t msglen[8];
-    uint32_t lo = S->t[0] + S->buflen, hi = S->t[1];
-    if (lo < (unsigned) S->buflen) hi++;
-    U32TO8(msglen + 0, hi);
-    U32TO8(msglen + 4, lo);
-
-    if (S->buflen == 440) { /* one padding byte */
-        S->t[0] -= 8;
-        blake256_update(S, &pa, 8);
-    } else {
-        if (S->buflen < 440) { /* enough space to fill the block  */
-            if (S->buflen == 0) S->nullt = 1;
-            S->t[0] -= 440 - S->buflen;
-            blake256_update(S, padding, 440 - S->buflen);
-        } else { /* need 2 compressions */
-            S->t[0] -= 512 - S->buflen;
-            blake256_update(S, padding, 512 - S->buflen);
-            S->t[0] -= 440;
-            blake256_update(S, padding + 1, 440);
-            S->nullt = 1;
-        }
-        blake256_update(S, &pb, 8);
-        S->t[0] -= 8;
-    }
-    S->t[0] -= 64;
-    blake256_update(S, msglen, 64);
-
-    U32TO8(digest +  0, S->h[0]);
-    U32TO8(digest +  4, S->h[1]);
-    U32TO8(digest +  8, S->h[2]);
-    U32TO8(digest + 12, S->h[3]);
-    U32TO8(digest + 16, S->h[4]);
-    U32TO8(digest + 20, S->h[5]);
-    U32TO8(digest + 24, S->h[6]);
-    U32TO8(digest + 28, S->h[7]);
-}
-
-void blake256_final(state *S, uint8_t *digest) {
-    blake256_final_h(S, digest, 0x81, 0x01);
-}
-
-void blake224_final(state *S, uint8_t *digest) {
-    blake256_final_h(S, digest, 0x80, 0x00);
-}
-
-// inlen = number of bytes
-void blake256_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) {
-    state S;
-    blake256_init(&S);
-    blake256_update(&S, in, inlen * 8);
-    blake256_final(&S, out);
-}
-
-// inlen = number of bytes
-void blake224_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) {
-    state S;
-    blake224_init(&S);
-    blake224_update(&S, in, inlen * 8);
-    blake224_final(&S, out);
-}
-
-// keylen = number of bytes
-void hmac_blake256_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) {
-    const uint8_t *key = _key;
-    uint8_t keyhash[32];
-    uint8_t pad[64];
-    uint64_t i;
-
-    if (keylen > 64) {
-        blake256_hash(keyhash, key, keylen);
-        key = keyhash;
-        keylen = 32;
-    }
-
-    blake256_init(&S->inner);
-    memset(pad, 0x36, 64);
-    for (i = 0; i < keylen; ++i) {
-        pad[i] ^= key[i];
-    }
-    blake256_update(&S->inner, pad, 512);
-
-    blake256_init(&S->outer);
-    memset(pad, 0x5c, 64);
-    for (i = 0; i < keylen; ++i) {
-        pad[i] ^= key[i];
-    }
-    blake256_update(&S->outer, pad, 512);
-
-    memset(keyhash, 0, 32);
-}
-
-// keylen = number of bytes
-void hmac_blake224_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) {
-    const uint8_t *key = _key;
-    uint8_t keyhash[32];
-    uint8_t pad[64];
-    uint64_t i;
-
-    if (keylen > 64) {
-        blake256_hash(keyhash, key, keylen);
-        key = keyhash;
-        keylen = 28;
-    }
-
-    blake224_init(&S->inner);
-    memset(pad, 0x36, 64);
-    for (i = 0; i < keylen; ++i) {
-        pad[i] ^= key[i];
-    }
-    blake224_update(&S->inner, pad, 512);
-
-    blake224_init(&S->outer);
-    memset(pad, 0x5c, 64);
-    for (i = 0; i < keylen; ++i) {
-        pad[i] ^= key[i];
-    }
-    blake224_update(&S->outer, pad, 512);
-
-    memset(keyhash, 0, 32);
-}
-
-// datalen = number of bits
-void hmac_blake256_update(hmac_state *S, const uint8_t *data, uint64_t datalen) {
-  // update the inner state
-  blake256_update(&S->inner, data, datalen);
-}
-
-// datalen = number of bits
-void hmac_blake224_update(hmac_state *S, const uint8_t *data, uint64_t datalen) {
-  // update the inner state
-  blake224_update(&S->inner, data, datalen);
-}
-
-void hmac_blake256_final(hmac_state *S, uint8_t *digest) {
-    uint8_t ihash[32];
-    blake256_final(&S->inner, ihash);
-    blake256_update(&S->outer, ihash, 256);
-    blake256_final(&S->outer, digest);
-    memset(ihash, 0, 32);
-}
-
-void hmac_blake224_final(hmac_state *S, uint8_t *digest) {
-    uint8_t ihash[32];
-    blake224_final(&S->inner, ihash);
-    blake224_update(&S->outer, ihash, 224);
-    blake224_final(&S->outer, digest);
-    memset(ihash, 0, 32);
-}
-
-// keylen = number of bytes; inlen = number of bytes
-void hmac_blake256_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) {
-    hmac_state S;
-    hmac_blake256_init(&S, key, keylen);
-    hmac_blake256_update(&S, in, inlen * 8);
-    hmac_blake256_final(&S, out);
-}
-
-// keylen = number of bytes; inlen = number of bytes
-void hmac_blake224_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) {
-    hmac_state S;
-    hmac_blake224_init(&S, key, keylen);
-    hmac_blake224_update(&S, in, inlen * 8);
-    hmac_blake224_final(&S, out);
-}
--- a/crypto/c_blake256.h
+++ b/crypto/c_blake256.h
@@ -1,43 +0,0 @@
-#ifndef _BLAKE256_H_
-#define _BLAKE256_H_
-
-#include <stdint.h>
-
-typedef struct {
-  uint32_t h[8], s[4], t[2];
-  int buflen, nullt;
-  uint8_t buf[64];
-} state;
-
-typedef struct {
-  state inner;
-  state outer;
-} hmac_state;
-
-void blake256_init(state *);
-void blake224_init(state *);
-
-void blake256_update(state *, const uint8_t *, uint64_t);
-void blake224_update(state *, const uint8_t *, uint64_t);
-
-void blake256_final(state *, uint8_t *);
-void blake224_final(state *, uint8_t *);
-
-void blake256_hash(uint8_t *, const uint8_t *, uint64_t);
-void blake224_hash(uint8_t *, const uint8_t *, uint64_t);
-
-/* HMAC functions: */
-
-void hmac_blake256_init(hmac_state *, const uint8_t *, uint64_t);
-void hmac_blake224_init(hmac_state *, const uint8_t *, uint64_t);
-
-void hmac_blake256_update(hmac_state *, const uint8_t *, uint64_t);
-void hmac_blake224_update(hmac_state *, const uint8_t *, uint64_t);
-
-void hmac_blake256_final(hmac_state *, uint8_t *);
-void hmac_blake224_final(hmac_state *, uint8_t *);
-
-void hmac_blake256_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t);
-void hmac_blake224_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t);
-
-#endif /* _BLAKE256_H_ */
--- a/crypto/c_groestl.c
+++ b/crypto/c_groestl.c
@@ -1,360 +0,0 @@
-/* hash.c     April 2012
- * Groestl ANSI C code optimised for 32-bit machines
- * Author: Thomas Krinninger
- *
- *  This work is based on the implementation of
- *          Soeren S. Thomsen and Krystian Matusiewicz
- *          
- *
- */
-
-#include "c_groestl.h"
-#include "groestl_tables.h"
-
-#define P_TYPE 0
-#define Q_TYPE 1
-
-const uint8_t shift_Values[2][8] = {{0,1,2,3,4,5,6,7},{1,3,5,7,0,2,4,6}};
-
-const uint8_t indices_cyclic[15] = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6};
-
-
-#define ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) {temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \
-															v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \
-															v1 = temp_var;}
-  
-
-#define COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t)				\
-   tu = T[2*(uint32_t)x[4*c0+0]];			    \
-   tl = T[2*(uint32_t)x[4*c0+0]+1];		    \
-   tv1 = T[2*(uint32_t)x[4*c1+1]];			\
-   tv2 = T[2*(uint32_t)x[4*c1+1]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,1,t)	\
-   tu ^= tv1;						\
-   tl ^= tv2;						\
-   tv1 = T[2*(uint32_t)x[4*c2+2]];			\
-   tv2 = T[2*(uint32_t)x[4*c2+2]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,2,t)	\
-   tu ^= tv1;						\
-   tl ^= tv2;   					\
-   tv1 = T[2*(uint32_t)x[4*c3+3]];			\
-   tv2 = T[2*(uint32_t)x[4*c3+3]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,3,t)	\
-   tu ^= tv1;						\
-   tl ^= tv2;						\
-   tl ^= T[2*(uint32_t)x[4*c4+0]];			\
-   tu ^= T[2*(uint32_t)x[4*c4+0]+1];			\
-   tv1 = T[2*(uint32_t)x[4*c5+1]];			\
-   tv2 = T[2*(uint32_t)x[4*c5+1]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,1,t)	\
-   tl ^= tv1;						\
-   tu ^= tv2;						\
-   tv1 = T[2*(uint32_t)x[4*c6+2]];			\
-   tv2 = T[2*(uint32_t)x[4*c6+2]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,2,t)	\
-   tl ^= tv1;						\
-   tu ^= tv2;   					\
-   tv1 = T[2*(uint32_t)x[4*c7+3]];			\
-   tv2 = T[2*(uint32_t)x[4*c7+3]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,3,t)	\
-   tl ^= tv1;						\
-   tu ^= tv2;						\
-   y[i] = tu;						\
-   y[i+1] = tl;
-
-
-/* compute one round of P (short variants) */
-static void RND512P(uint8_t *x, uint32_t *y, uint32_t r) {
-  uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
-  uint32_t* x32 = (uint32_t*)x;
-  x32[ 0] ^= 0x00000000^r;
-  x32[ 2] ^= 0x00000010^r;
-  x32[ 4] ^= 0x00000020^r;
-  x32[ 6] ^= 0x00000030^r;
-  x32[ 8] ^= 0x00000040^r;
-  x32[10] ^= 0x00000050^r;
-  x32[12] ^= 0x00000060^r;
-  x32[14] ^= 0x00000070^r;
-  COLUMN(x,y, 0,  0,  2,  4,  6,  9, 11, 13, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 2,  2,  4,  6,  8, 11, 13, 15,  1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 4,  4,  6,  8, 10, 13, 15,  1,  3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 6,  6,  8, 10, 12, 15,  1,  3,  5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 8,  8, 10, 12, 14,  1,  3,  5,  7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,10, 10, 12, 14,  0,  3,  5,  7,  9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,12, 12, 14,  0,  2,  5,  7,  9, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,14, 14,  0,  2,  4,  7,  9, 11, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-}
-
-/* compute one round of Q (short variants) */
-static void RND512Q(uint8_t *x, uint32_t *y, uint32_t r) {
-  uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
-  uint32_t* x32 = (uint32_t*)x;
-  x32[ 0] = ~x32[ 0];
-  x32[ 1] ^= 0xffffffff^r;
-  x32[ 2] = ~x32[ 2];
-  x32[ 3] ^= 0xefffffff^r;
-  x32[ 4] = ~x32[ 4];
-  x32[ 5] ^= 0xdfffffff^r;
-  x32[ 6] = ~x32[ 6];
-  x32[ 7] ^= 0xcfffffff^r;
-  x32[ 8] = ~x32[ 8];
-  x32[ 9] ^= 0xbfffffff^r;
-  x32[10] = ~x32[10];
-  x32[11] ^= 0xafffffff^r;
-  x32[12] = ~x32[12];
-  x32[13] ^= 0x9fffffff^r;
-  x32[14] = ~x32[14];
-  x32[15] ^= 0x8fffffff^r;
-  COLUMN(x,y, 0,  2,  6, 10, 14,  1,  5,  9, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 2,  4,  8, 12,  0,  3,  7, 11, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 4,  6, 10, 14,  2,  5,  9, 13,  1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 6,  8, 12,  0,  4,  7, 11, 15,  3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 8, 10, 14,  2,  6,  9, 13,  1,  5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,10, 12,  0,  4,  8, 11, 15,  3,  7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,12, 14,  2,  6, 10, 13,  1,  5,  9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,14,  0,  4,  8, 12, 15,  3,  7, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-}
-
-/* compute compression function (short variants) */
-static void F512(uint32_t *h, const uint32_t *m) {
-  int i;
-  uint32_t Ptmp[2*COLS512];
-  uint32_t Qtmp[2*COLS512];
-  uint32_t y[2*COLS512];
-  uint32_t z[2*COLS512];
-
-  for (i = 0; i < 2*COLS512; i++) {
-    z[i] = m[i];
-    Ptmp[i] = h[i]^m[i];
-  }
-
-  /* compute Q(m) */
-  RND512Q((uint8_t*)z, y, 0x00000000);
-  RND512Q((uint8_t*)y, z, 0x01000000);
-  RND512Q((uint8_t*)z, y, 0x02000000);
-  RND512Q((uint8_t*)y, z, 0x03000000);
-  RND512Q((uint8_t*)z, y, 0x04000000);
-  RND512Q((uint8_t*)y, z, 0x05000000);
-  RND512Q((uint8_t*)z, y, 0x06000000);
-  RND512Q((uint8_t*)y, z, 0x07000000);
-  RND512Q((uint8_t*)z, y, 0x08000000);
-  RND512Q((uint8_t*)y, Qtmp, 0x09000000);
-
-  /* compute P(h+m) */
-  RND512P((uint8_t*)Ptmp, y, 0x00000000);
-  RND512P((uint8_t*)y, z, 0x00000001);
-  RND512P((uint8_t*)z, y, 0x00000002);
-  RND512P((uint8_t*)y, z, 0x00000003);
-  RND512P((uint8_t*)z, y, 0x00000004);
-  RND512P((uint8_t*)y, z, 0x00000005);
-  RND512P((uint8_t*)z, y, 0x00000006);
-  RND512P((uint8_t*)y, z, 0x00000007);
-  RND512P((uint8_t*)z, y, 0x00000008);
-  RND512P((uint8_t*)y, Ptmp, 0x00000009);
-
-  /* compute P(h+m) + Q(m) + h */
-  for (i = 0; i < 2*COLS512; i++) {
-    h[i] ^= Ptmp[i]^Qtmp[i];
-  }
-}
-
-
-/* digest up to msglen bytes of input (full blocks only) */
-static void Transform(groestlHashState *ctx,
-	       const uint8_t *input, 
-	       int msglen) {
-
-  /* digest message, one block at a time */
-  for (; msglen >= SIZE512; 
-       msglen -= SIZE512, input += SIZE512) {
-    F512(ctx->chaining,(uint32_t*)input);
-
-    /* increment block counter */
-    ctx->block_counter1++;
-    if (ctx->block_counter1 == 0) ctx->block_counter2++;
-  }
-}
-
-/* given state h, do h <- P(h)+h */
-static void OutputTransformation(groestlHashState *ctx) {
-  int j;
-  uint32_t temp[2*COLS512];
-  uint32_t y[2*COLS512];
-  uint32_t z[2*COLS512];
-
-
-
-	for (j = 0; j < 2*COLS512; j++) {
-	  temp[j] = ctx->chaining[j];
-	}
-	RND512P((uint8_t*)temp, y, 0x00000000);
-	RND512P((uint8_t*)y, z, 0x00000001);
-	RND512P((uint8_t*)z, y, 0x00000002);
-	RND512P((uint8_t*)y, z, 0x00000003);
-	RND512P((uint8_t*)z, y, 0x00000004);
-	RND512P((uint8_t*)y, z, 0x00000005);
-	RND512P((uint8_t*)z, y, 0x00000006);
-	RND512P((uint8_t*)y, z, 0x00000007);
-	RND512P((uint8_t*)z, y, 0x00000008);
-	RND512P((uint8_t*)y, temp, 0x00000009);
-	for (j = 0; j < 2*COLS512; j++) {
-	  ctx->chaining[j] ^= temp[j];
-	}									  
-}
-
-/* initialise context */
-static void Init(groestlHashState* ctx) {
-  int i = 0;
-  /* allocate memory for state and data buffer */
-
-  for(;i<(SIZE512/sizeof(uint32_t));i++)
-  {
-	ctx->chaining[i] = 0;
-  }
-
-  /* set initial value */
-  ctx->chaining[2*COLS512-1] = u32BIG((uint32_t)HASH_BIT_LEN);
-
-  /* set other variables */
-  ctx->buf_ptr = 0;
-  ctx->block_counter1 = 0;
-  ctx->block_counter2 = 0;
-  ctx->bits_in_last_byte = 0;
-}
-
-/* update state with databitlen bits of input */
-static void Update(groestlHashState* ctx,
-		  const BitSequence* input,
-		  DataLength databitlen) {
-  int index = 0;
-  int msglen = (int)(databitlen/8);
-  int rem = (int)(databitlen%8);
-
-  /* if the buffer contains data that has not yet been digested, first
-     add data to buffer until full */
-  if (ctx->buf_ptr) {
-    while (ctx->buf_ptr < SIZE512 && index < msglen) {
-      ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
-    }
-    if (ctx->buf_ptr < SIZE512) {
-      /* buffer still not full, return */
-      if (rem) {
-	ctx->bits_in_last_byte = rem;
-	ctx->buffer[(int)ctx->buf_ptr++] = input[index];
-      }
-      return;
-    }
-
-    /* digest buffer */
-    ctx->buf_ptr = 0;
-    Transform(ctx, ctx->buffer, SIZE512);
-  }
-
-  /* digest bulk of message */
-  Transform(ctx, input+index, msglen-index);
-  index += ((msglen-index)/SIZE512)*SIZE512;
-
-  /* store remaining data in buffer */
-  while (index < msglen) {
-    ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
-  }
-
-  /* if non-integral number of bytes have been supplied, store
-     remaining bits in last byte, together with information about
-     number of bits */
-  if (rem) {
-    ctx->bits_in_last_byte = rem;
-    ctx->buffer[(int)ctx->buf_ptr++] = input[index];
-  }
-}
-
-#define BILB ctx->bits_in_last_byte
-
-/* finalise: process remaining data (including padding), perform
-   output transformation, and write hash result to 'output' */
-static void Final(groestlHashState* ctx,
-		 BitSequence* output) {
-  int i, j = 0, hashbytelen = HASH_BIT_LEN/8;
-  uint8_t *s = (BitSequence*)ctx->chaining;
-
-  /* pad with '1'-bit and first few '0'-bits */
-  if (BILB) {
-    ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
-    ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB);
-    BILB = 0;
-  }
-  else ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
-
-  /* pad with '0'-bits */
-  if (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) {
-    /* padding requires two blocks */
-    while (ctx->buf_ptr < SIZE512) {
-      ctx->buffer[(int)ctx->buf_ptr++] = 0;
-    }
-    /* digest first padding block */
-    Transform(ctx, ctx->buffer, SIZE512);
-    ctx->buf_ptr = 0;
-  }
-  while (ctx->buf_ptr < SIZE512-LENGTHFIELDLEN) {
-    ctx->buffer[(int)ctx->buf_ptr++] = 0;
-  }
-
-  /* length padding */
-  ctx->block_counter1++;
-  if (ctx->block_counter1 == 0) ctx->block_counter2++;
-  ctx->buf_ptr = SIZE512;
-
-  while (ctx->buf_ptr > SIZE512-(int)sizeof(uint32_t)) {
-    ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter1;
-    ctx->block_counter1 >>= 8;
-  }
-  while (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) {
-    ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter2;
-    ctx->block_counter2 >>= 8;
-  }
-  /* digest final padding block */
-  Transform(ctx, ctx->buffer, SIZE512); 
-  /* perform output transformation */
-  OutputTransformation(ctx);
-
-  /* store hash result in output */
-  for (i = SIZE512-hashbytelen; i < SIZE512; i++,j++) {
-    output[j] = s[i];
-  }
-
-  /* zeroise relevant variables and deallocate memory */
-  for (i = 0; i < COLS512; i++) {
-    ctx->chaining[i] = 0;
-  }
-  for (i = 0; i < SIZE512; i++) {
-    ctx->buffer[i] = 0;
-  }
-}
-
-/* hash bit sequence */
-void groestl(const BitSequence* data, 
-		DataLength databitlen,
-		BitSequence* hashval) {
-
-  groestlHashState context;
-
-  /* initialise */
-    Init(&context);
-
-
-  /* process message */
-  Update(&context, data, databitlen);
-
-  /* finalise */
-  Final(&context, hashval);
-}
-/*
-static int crypto_hash(unsigned char *out,
-		const unsigned char *in,
-		unsigned long long len)
-{
-  groestl(in, 8*len, out);
-  return 0;
-}
-
-*/
--- a/crypto/c_groestl.h
+++ b/crypto/c_groestl.h
@@ -1,60 +0,0 @@
-#ifndef __hash_h
-#define __hash_h
-/*
-#include "crypto_uint8.h"
-#include "crypto_uint32.h"
-#include "crypto_uint64.h"
-#include "crypto_hash.h" 
-
-typedef crypto_uint8 uint8_t; 
-typedef crypto_uint32 uint32_t; 
-typedef crypto_uint64 uint64_t;
-*/
-#include <stdint.h>
-
-#include "hash.h"
-
-/* some sizes (number of bytes) */
-#define ROWS 8
-#define LENGTHFIELDLEN ROWS
-#define COLS512 8
-
-#define SIZE512 (ROWS*COLS512)
-
-#define ROUNDS512 10
-#define HASH_BIT_LEN 256
-
-#define ROTL32(v, n) ((((v)<<(n))|((v)>>(32-(n))))&li_32(ffffffff))
-
-
-#define li_32(h) 0x##h##u
-#define EXT_BYTE(var,n) ((uint8_t)((uint32_t)(var) >> (8*n)))
-#define u32BIG(a)				\
-  ((ROTL32(a,8) & li_32(00FF00FF)) |		\
-   (ROTL32(a,24) & li_32(FF00FF00)))
-
-
-/* NIST API begin */
-typedef struct {
-  uint32_t chaining[SIZE512/sizeof(uint32_t)];            /* actual state */
-  uint32_t block_counter1,
-  block_counter2;         /* message block counter(s) */
-  BitSequence buffer[SIZE512];      /* data buffer */
-  int buf_ptr;              /* data buffer pointer */
-  int bits_in_last_byte;    /* no. of message bits in last byte of
-			       data buffer */
-} groestlHashState;
-
-/*void Init(hashState*);
-void Update(hashState*, const BitSequence*, DataLength);
-void Final(hashState*, BitSequence*); */
-void groestl(const BitSequence*, DataLength, BitSequence*);
-/* NIST API end   */
-
-/*
-int crypto_hash(unsigned char *out,
-		const unsigned char *in,
-		unsigned long long len);
-*/
-
-#endif /* __hash_h */
--- a/crypto/c_jh.c
+++ b/crypto/c_jh.c
@@ -1,366 +0,0 @@
-/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C
-
-   --------------------------------
-   Performance
-
-   Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz)
-   Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic)
-   Speed for long message:
-   1) 45.8 cycles/byte   compiler: Intel C++ Compiler 11.1   compilation option: icc -O2
-   2) 56.8 cycles/byte   compiler: gcc 4.4.3                 compilation option: gcc -O3
-
-   --------------------------------
-   Last Modified: January 16, 2011
-*/
-
-#include "c_jh.h"
-
-#include <stdint.h>
-#include <string.h>
-
-/*typedef unsigned long long uint64;*/
-typedef uint64_t uint64;
-
-/*define data alignment for different C compilers*/
-#if defined(__GNUC__)
-      #define DATA_ALIGN16(x) x __attribute__ ((aligned(16)))
-#else
-      #define DATA_ALIGN16(x) __declspec(align(16)) x
-#endif
-
-
-typedef struct {
-	int hashbitlen;	   	              /*the message digest size*/
-	unsigned long long databitlen;    /*the message size in bits*/
-	unsigned long long datasize_in_buffer;      /*the size of the message remained in buffer; assumed to be multiple of 8bits except for the last partial block at the end of the message*/
-	DATA_ALIGN16(uint64 x[8][2]);     /*the 1024-bit state, ( x[i][0] || x[i][1] ) is the ith row of the state in the pseudocode*/
-	unsigned char buffer[64];         /*the 512-bit message block to be hashed;*/
-} hashState;
-
-
-/*The initial hash value H(0)*/
-const unsigned char JH224_H0[128]={0x2d,0xfe,0xdd,0x62,0xf9,0x9a,0x98,0xac,0xae,0x7c,0xac,0xd6,0x19,0xd6,0x34,0xe7,0xa4,0x83,0x10,0x5,0xbc,0x30,0x12,0x16,0xb8,0x60,0x38,0xc6,0xc9,0x66,0x14,0x94,0x66,0xd9,0x89,0x9f,0x25,0x80,0x70,0x6f,0xce,0x9e,0xa3,0x1b,0x1d,0x9b,0x1a,0xdc,0x11,0xe8,0x32,0x5f,0x7b,0x36,0x6e,0x10,0xf9,0x94,0x85,0x7f,0x2,0xfa,0x6,0xc1,0x1b,0x4f,0x1b,0x5c,0xd8,0xc8,0x40,0xb3,0x97,0xf6,0xa1,0x7f,0x6e,0x73,0x80,0x99,0xdc,0xdf,0x93,0xa5,0xad,0xea,0xa3,0xd3,0xa4,0x31,0xe8,0xde,0xc9,0x53,0x9a,0x68,0x22,0xb4,0xa9,0x8a,0xec,0x86,0xa1,0xe4,0xd5,0x74,0xac,0x95,0x9c,0xe5,0x6c,0xf0,0x15,0x96,0xd,0xea,0xb5,0xab,0x2b,0xbf,0x96,0x11,0xdc,0xf0,0xdd,0x64,0xea,0x6e};
-const unsigned char JH256_H0[128]={0xeb,0x98,0xa3,0x41,0x2c,0x20,0xd3,0xeb,0x92,0xcd,0xbe,0x7b,0x9c,0xb2,0x45,0xc1,0x1c,0x93,0x51,0x91,0x60,0xd4,0xc7,0xfa,0x26,0x0,0x82,0xd6,0x7e,0x50,0x8a,0x3,0xa4,0x23,0x9e,0x26,0x77,0x26,0xb9,0x45,0xe0,0xfb,0x1a,0x48,0xd4,0x1a,0x94,0x77,0xcd,0xb5,0xab,0x26,0x2,0x6b,0x17,0x7a,0x56,0xf0,0x24,0x42,0xf,0xff,0x2f,0xa8,0x71,0xa3,0x96,0x89,0x7f,0x2e,0x4d,0x75,0x1d,0x14,0x49,0x8,0xf7,0x7d,0xe2,0x62,0x27,0x76,0x95,0xf7,0x76,0x24,0x8f,0x94,0x87,0xd5,0xb6,0x57,0x47,0x80,0x29,0x6c,0x5c,0x5e,0x27,0x2d,0xac,0x8e,0xd,0x6c,0x51,0x84,0x50,0xc6,0x57,0x5,0x7a,0xf,0x7b,0xe4,0xd3,0x67,0x70,0x24,0x12,0xea,0x89,0xe3,0xab,0x13,0xd3,0x1c,0xd7,0x69};
-const unsigned char JH384_H0[128]={0x48,0x1e,0x3b,0xc6,0xd8,0x13,0x39,0x8a,0x6d,0x3b,0x5e,0x89,0x4a,0xde,0x87,0x9b,0x63,0xfa,0xea,0x68,0xd4,0x80,0xad,0x2e,0x33,0x2c,0xcb,0x21,0x48,0xf,0x82,0x67,0x98,0xae,0xc8,0x4d,0x90,0x82,0xb9,0x28,0xd4,0x55,0xea,0x30,0x41,0x11,0x42,0x49,0x36,0xf5,0x55,0xb2,0x92,0x48,0x47,0xec,0xc7,0x25,0xa,0x93,0xba,0xf4,0x3c,0xe1,0x56,0x9b,0x7f,0x8a,0x27,0xdb,0x45,0x4c,0x9e,0xfc,0xbd,0x49,0x63,0x97,0xaf,0xe,0x58,0x9f,0xc2,0x7d,0x26,0xaa,0x80,0xcd,0x80,0xc0,0x8b,0x8c,0x9d,0xeb,0x2e,0xda,0x8a,0x79,0x81,0xe8,0xf8,0xd5,0x37,0x3a,0xf4,0x39,0x67,0xad,0xdd,0xd1,0x7a,0x71,0xa9,0xb4,0xd3,0xbd,0xa4,0x75,0xd3,0x94,0x97,0x6c,0x3f,0xba,0x98,0x42,0x73,0x7f};
-const unsigned char JH512_H0[128]={0x6f,0xd1,0x4b,0x96,0x3e,0x0,0xaa,0x17,0x63,0x6a,0x2e,0x5,0x7a,0x15,0xd5,0x43,0x8a,0x22,0x5e,0x8d,0xc,0x97,0xef,0xb,0xe9,0x34,0x12,0x59,0xf2,0xb3,0xc3,0x61,0x89,0x1d,0xa0,0xc1,0x53,0x6f,0x80,0x1e,0x2a,0xa9,0x5,0x6b,0xea,0x2b,0x6d,0x80,0x58,0x8e,0xcc,0xdb,0x20,0x75,0xba,0xa6,0xa9,0xf,0x3a,0x76,0xba,0xf8,0x3b,0xf7,0x1,0x69,0xe6,0x5,0x41,0xe3,0x4a,0x69,0x46,0xb5,0x8a,0x8e,0x2e,0x6f,0xe6,0x5a,0x10,0x47,0xa7,0xd0,0xc1,0x84,0x3c,0x24,0x3b,0x6e,0x71,0xb1,0x2d,0x5a,0xc1,0x99,0xcf,0x57,0xf6,0xec,0x9d,0xb1,0xf8,0x56,0xa7,0x6,0x88,0x7c,0x57,0x16,0xb1,0x56,0xe3,0xc2,0xfc,0xdf,0xe6,0x85,0x17,0xfb,0x54,0x5a,0x46,0x78,0xcc,0x8c,0xdd,0x4b};
-
-/*42 round constants, each round constant is 32-byte (256-bit)*/
-const unsigned char E8_bitslice_roundconstant[42][32]={
-{0x72,0xd5,0xde,0xa2,0xdf,0x15,0xf8,0x67,0x7b,0x84,0x15,0xa,0xb7,0x23,0x15,0x57,0x81,0xab,0xd6,0x90,0x4d,0x5a,0x87,0xf6,0x4e,0x9f,0x4f,0xc5,0xc3,0xd1,0x2b,0x40},
-{0xea,0x98,0x3a,0xe0,0x5c,0x45,0xfa,0x9c,0x3,0xc5,0xd2,0x99,0x66,0xb2,0x99,0x9a,0x66,0x2,0x96,0xb4,0xf2,0xbb,0x53,0x8a,0xb5,0x56,0x14,0x1a,0x88,0xdb,0xa2,0x31},
-{0x3,0xa3,0x5a,0x5c,0x9a,0x19,0xe,0xdb,0x40,0x3f,0xb2,0xa,0x87,0xc1,0x44,0x10,0x1c,0x5,0x19,0x80,0x84,0x9e,0x95,0x1d,0x6f,0x33,0xeb,0xad,0x5e,0xe7,0xcd,0xdc},
-{0x10,0xba,0x13,0x92,0x2,0xbf,0x6b,0x41,0xdc,0x78,0x65,0x15,0xf7,0xbb,0x27,0xd0,0xa,0x2c,0x81,0x39,0x37,0xaa,0x78,0x50,0x3f,0x1a,0xbf,0xd2,0x41,0x0,0x91,0xd3},
-{0x42,0x2d,0x5a,0xd,0xf6,0xcc,0x7e,0x90,0xdd,0x62,0x9f,0x9c,0x92,0xc0,0x97,0xce,0x18,0x5c,0xa7,0xb,0xc7,0x2b,0x44,0xac,0xd1,0xdf,0x65,0xd6,0x63,0xc6,0xfc,0x23},
-{0x97,0x6e,0x6c,0x3,0x9e,0xe0,0xb8,0x1a,0x21,0x5,0x45,0x7e,0x44,0x6c,0xec,0xa8,0xee,0xf1,0x3,0xbb,0x5d,0x8e,0x61,0xfa,0xfd,0x96,0x97,0xb2,0x94,0x83,0x81,0x97},
-{0x4a,0x8e,0x85,0x37,0xdb,0x3,0x30,0x2f,0x2a,0x67,0x8d,0x2d,0xfb,0x9f,0x6a,0x95,0x8a,0xfe,0x73,0x81,0xf8,0xb8,0x69,0x6c,0x8a,0xc7,0x72,0x46,0xc0,0x7f,0x42,0x14},
-{0xc5,0xf4,0x15,0x8f,0xbd,0xc7,0x5e,0xc4,0x75,0x44,0x6f,0xa7,0x8f,0x11,0xbb,0x80,0x52,0xde,0x75,0xb7,0xae,0xe4,0x88,0xbc,0x82,0xb8,0x0,0x1e,0x98,0xa6,0xa3,0xf4},
-{0x8e,0xf4,0x8f,0x33,0xa9,0xa3,0x63,0x15,0xaa,0x5f,0x56,0x24,0xd5,0xb7,0xf9,0x89,0xb6,0xf1,0xed,0x20,0x7c,0x5a,0xe0,0xfd,0x36,0xca,0xe9,0x5a,0x6,0x42,0x2c,0x36},
-{0xce,0x29,0x35,0x43,0x4e,0xfe,0x98,0x3d,0x53,0x3a,0xf9,0x74,0x73,0x9a,0x4b,0xa7,0xd0,0xf5,0x1f,0x59,0x6f,0x4e,0x81,0x86,0xe,0x9d,0xad,0x81,0xaf,0xd8,0x5a,0x9f},
-{0xa7,0x5,0x6,0x67,0xee,0x34,0x62,0x6a,0x8b,0xb,0x28,0xbe,0x6e,0xb9,0x17,0x27,0x47,0x74,0x7,0x26,0xc6,0x80,0x10,0x3f,0xe0,0xa0,0x7e,0x6f,0xc6,0x7e,0x48,0x7b},
-{0xd,0x55,0xa,0xa5,0x4a,0xf8,0xa4,0xc0,0x91,0xe3,0xe7,0x9f,0x97,0x8e,0xf1,0x9e,0x86,0x76,0x72,0x81,0x50,0x60,0x8d,0xd4,0x7e,0x9e,0x5a,0x41,0xf3,0xe5,0xb0,0x62},
-{0xfc,0x9f,0x1f,0xec,0x40,0x54,0x20,0x7a,0xe3,0xe4,0x1a,0x0,0xce,0xf4,0xc9,0x84,0x4f,0xd7,0x94,0xf5,0x9d,0xfa,0x95,0xd8,0x55,0x2e,0x7e,0x11,0x24,0xc3,0x54,0xa5},
-{0x5b,0xdf,0x72,0x28,0xbd,0xfe,0x6e,0x28,0x78,0xf5,0x7f,0xe2,0xf,0xa5,0xc4,0xb2,0x5,0x89,0x7c,0xef,0xee,0x49,0xd3,0x2e,0x44,0x7e,0x93,0x85,0xeb,0x28,0x59,0x7f},
-{0x70,0x5f,0x69,0x37,0xb3,0x24,0x31,0x4a,0x5e,0x86,0x28,0xf1,0x1d,0xd6,0xe4,0x65,0xc7,0x1b,0x77,0x4,0x51,0xb9,0x20,0xe7,0x74,0xfe,0x43,0xe8,0x23,0xd4,0x87,0x8a},
-{0x7d,0x29,0xe8,0xa3,0x92,0x76,0x94,0xf2,0xdd,0xcb,0x7a,0x9,0x9b,0x30,0xd9,0xc1,0x1d,0x1b,0x30,0xfb,0x5b,0xdc,0x1b,0xe0,0xda,0x24,0x49,0x4f,0xf2,0x9c,0x82,0xbf},
-{0xa4,0xe7,0xba,0x31,0xb4,0x70,0xbf,0xff,0xd,0x32,0x44,0x5,0xde,0xf8,0xbc,0x48,0x3b,0xae,0xfc,0x32,0x53,0xbb,0xd3,0x39,0x45,0x9f,0xc3,0xc1,0xe0,0x29,0x8b,0xa0},
-{0xe5,0xc9,0x5,0xfd,0xf7,0xae,0x9,0xf,0x94,0x70,0x34,0x12,0x42,0x90,0xf1,0x34,0xa2,0x71,0xb7,0x1,0xe3,0x44,0xed,0x95,0xe9,0x3b,0x8e,0x36,0x4f,0x2f,0x98,0x4a},
-{0x88,0x40,0x1d,0x63,0xa0,0x6c,0xf6,0x15,0x47,0xc1,0x44,0x4b,0x87,0x52,0xaf,0xff,0x7e,0xbb,0x4a,0xf1,0xe2,0xa,0xc6,0x30,0x46,0x70,0xb6,0xc5,0xcc,0x6e,0x8c,0xe6},
-{0xa4,0xd5,0xa4,0x56,0xbd,0x4f,0xca,0x0,0xda,0x9d,0x84,0x4b,0xc8,0x3e,0x18,0xae,0x73,0x57,0xce,0x45,0x30,0x64,0xd1,0xad,0xe8,0xa6,0xce,0x68,0x14,0x5c,0x25,0x67},
-{0xa3,0xda,0x8c,0xf2,0xcb,0xe,0xe1,0x16,0x33,0xe9,0x6,0x58,0x9a,0x94,0x99,0x9a,0x1f,0x60,0xb2,0x20,0xc2,0x6f,0x84,0x7b,0xd1,0xce,0xac,0x7f,0xa0,0xd1,0x85,0x18},
-{0x32,0x59,0x5b,0xa1,0x8d,0xdd,0x19,0xd3,0x50,0x9a,0x1c,0xc0,0xaa,0xa5,0xb4,0x46,0x9f,0x3d,0x63,0x67,0xe4,0x4,0x6b,0xba,0xf6,0xca,0x19,0xab,0xb,0x56,0xee,0x7e},
-{0x1f,0xb1,0x79,0xea,0xa9,0x28,0x21,0x74,0xe9,0xbd,0xf7,0x35,0x3b,0x36,0x51,0xee,0x1d,0x57,0xac,0x5a,0x75,0x50,0xd3,0x76,0x3a,0x46,0xc2,0xfe,0xa3,0x7d,0x70,0x1},
-{0xf7,0x35,0xc1,0xaf,0x98,0xa4,0xd8,0x42,0x78,0xed,0xec,0x20,0x9e,0x6b,0x67,0x79,0x41,0x83,0x63,0x15,0xea,0x3a,0xdb,0xa8,0xfa,0xc3,0x3b,0x4d,0x32,0x83,0x2c,0x83},
-{0xa7,0x40,0x3b,0x1f,0x1c,0x27,0x47,0xf3,0x59,0x40,0xf0,0x34,0xb7,0x2d,0x76,0x9a,0xe7,0x3e,0x4e,0x6c,0xd2,0x21,0x4f,0xfd,0xb8,0xfd,0x8d,0x39,0xdc,0x57,0x59,0xef},
-{0x8d,0x9b,0xc,0x49,0x2b,0x49,0xeb,0xda,0x5b,0xa2,0xd7,0x49,0x68,0xf3,0x70,0xd,0x7d,0x3b,0xae,0xd0,0x7a,0x8d,0x55,0x84,0xf5,0xa5,0xe9,0xf0,0xe4,0xf8,0x8e,0x65},
-{0xa0,0xb8,0xa2,0xf4,0x36,0x10,0x3b,0x53,0xc,0xa8,0x7,0x9e,0x75,0x3e,0xec,0x5a,0x91,0x68,0x94,0x92,0x56,0xe8,0x88,0x4f,0x5b,0xb0,0x5c,0x55,0xf8,0xba,0xbc,0x4c},
-{0xe3,0xbb,0x3b,0x99,0xf3,0x87,0x94,0x7b,0x75,0xda,0xf4,0xd6,0x72,0x6b,0x1c,0x5d,0x64,0xae,0xac,0x28,0xdc,0x34,0xb3,0x6d,0x6c,0x34,0xa5,0x50,0xb8,0x28,0xdb,0x71},
-{0xf8,0x61,0xe2,0xf2,0x10,0x8d,0x51,0x2a,0xe3,0xdb,0x64,0x33,0x59,0xdd,0x75,0xfc,0x1c,0xac,0xbc,0xf1,0x43,0xce,0x3f,0xa2,0x67,0xbb,0xd1,0x3c,0x2,0xe8,0x43,0xb0},
-{0x33,0xa,0x5b,0xca,0x88,0x29,0xa1,0x75,0x7f,0x34,0x19,0x4d,0xb4,0x16,0x53,0x5c,0x92,0x3b,0x94,0xc3,0xe,0x79,0x4d,0x1e,0x79,0x74,0x75,0xd7,0xb6,0xee,0xaf,0x3f},
-{0xea,0xa8,0xd4,0xf7,0xbe,0x1a,0x39,0x21,0x5c,0xf4,0x7e,0x9,0x4c,0x23,0x27,0x51,0x26,0xa3,0x24,0x53,0xba,0x32,0x3c,0xd2,0x44,0xa3,0x17,0x4a,0x6d,0xa6,0xd5,0xad},
-{0xb5,0x1d,0x3e,0xa6,0xaf,0xf2,0xc9,0x8,0x83,0x59,0x3d,0x98,0x91,0x6b,0x3c,0x56,0x4c,0xf8,0x7c,0xa1,0x72,0x86,0x60,0x4d,0x46,0xe2,0x3e,0xcc,0x8,0x6e,0xc7,0xf6},
-{0x2f,0x98,0x33,0xb3,0xb1,0xbc,0x76,0x5e,0x2b,0xd6,0x66,0xa5,0xef,0xc4,0xe6,0x2a,0x6,0xf4,0xb6,0xe8,0xbe,0xc1,0xd4,0x36,0x74,0xee,0x82,0x15,0xbc,0xef,0x21,0x63},
-{0xfd,0xc1,0x4e,0xd,0xf4,0x53,0xc9,0x69,0xa7,0x7d,0x5a,0xc4,0x6,0x58,0x58,0x26,0x7e,0xc1,0x14,0x16,0x6,0xe0,0xfa,0x16,0x7e,0x90,0xaf,0x3d,0x28,0x63,0x9d,0x3f},
-{0xd2,0xc9,0xf2,0xe3,0x0,0x9b,0xd2,0xc,0x5f,0xaa,0xce,0x30,0xb7,0xd4,0xc,0x30,0x74,0x2a,0x51,0x16,0xf2,0xe0,0x32,0x98,0xd,0xeb,0x30,0xd8,0xe3,0xce,0xf8,0x9a},
-{0x4b,0xc5,0x9e,0x7b,0xb5,0xf1,0x79,0x92,0xff,0x51,0xe6,0x6e,0x4,0x86,0x68,0xd3,0x9b,0x23,0x4d,0x57,0xe6,0x96,0x67,0x31,0xcc,0xe6,0xa6,0xf3,0x17,0xa,0x75,0x5},
-{0xb1,0x76,0x81,0xd9,0x13,0x32,0x6c,0xce,0x3c,0x17,0x52,0x84,0xf8,0x5,0xa2,0x62,0xf4,0x2b,0xcb,0xb3,0x78,0x47,0x15,0x47,0xff,0x46,0x54,0x82,0x23,0x93,0x6a,0x48},
-{0x38,0xdf,0x58,0x7,0x4e,0x5e,0x65,0x65,0xf2,0xfc,0x7c,0x89,0xfc,0x86,0x50,0x8e,0x31,0x70,0x2e,0x44,0xd0,0xb,0xca,0x86,0xf0,0x40,0x9,0xa2,0x30,0x78,0x47,0x4e},
-{0x65,0xa0,0xee,0x39,0xd1,0xf7,0x38,0x83,0xf7,0x5e,0xe9,0x37,0xe4,0x2c,0x3a,0xbd,0x21,0x97,0xb2,0x26,0x1,0x13,0xf8,0x6f,0xa3,0x44,0xed,0xd1,0xef,0x9f,0xde,0xe7},
-{0x8b,0xa0,0xdf,0x15,0x76,0x25,0x92,0xd9,0x3c,0x85,0xf7,0xf6,0x12,0xdc,0x42,0xbe,0xd8,0xa7,0xec,0x7c,0xab,0x27,0xb0,0x7e,0x53,0x8d,0x7d,0xda,0xaa,0x3e,0xa8,0xde},
-{0xaa,0x25,0xce,0x93,0xbd,0x2,0x69,0xd8,0x5a,0xf6,0x43,0xfd,0x1a,0x73,0x8,0xf9,0xc0,0x5f,0xef,0xda,0x17,0x4a,0x19,0xa5,0x97,0x4d,0x66,0x33,0x4c,0xfd,0x21,0x6a},
-{0x35,0xb4,0x98,0x31,0xdb,0x41,0x15,0x70,0xea,0x1e,0xf,0xbb,0xed,0xcd,0x54,0x9b,0x9a,0xd0,0x63,0xa1,0x51,0x97,0x40,0x72,0xf6,0x75,0x9d,0xbf,0x91,0x47,0x6f,0xe2}};
-
-
-static void E8(hashState *state);  /*The bijective function E8, in bitslice form*/
-static void F8(hashState *state);  /*The compression function F8 */
-
-/*The API functions*/
-static HashReturn Init(hashState *state, int hashbitlen);
-static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen);
-static HashReturn Final(hashState *state, BitSequence *hashval);
-HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval);
-
-/*swapping bit 2i with bit 2i+1 of 64-bit x*/
-#define SWAP1(x)   (x) = ((((x) & 0x5555555555555555ULL) << 1) | (((x) & 0xaaaaaaaaaaaaaaaaULL) >> 1));
-/*swapping bits 4i||4i+1 with bits 4i+2||4i+3 of 64-bit x*/
-#define SWAP2(x)   (x) = ((((x) & 0x3333333333333333ULL) << 2) | (((x) & 0xccccccccccccccccULL) >> 2));
-/*swapping bits 8i||8i+1||8i+2||8i+3 with bits 8i+4||8i+5||8i+6||8i+7 of 64-bit x*/
-#define SWAP4(x)   (x) = ((((x) & 0x0f0f0f0f0f0f0f0fULL) << 4) | (((x) & 0xf0f0f0f0f0f0f0f0ULL) >> 4));
-/*swapping bits 16i||16i+1||......||16i+7  with bits 16i+8||16i+9||......||16i+15 of 64-bit x*/
-#define SWAP8(x)   (x) = ((((x) & 0x00ff00ff00ff00ffULL) << 8) | (((x) & 0xff00ff00ff00ff00ULL) >> 8));
-/*swapping bits 32i||32i+1||......||32i+15 with bits 32i+16||32i+17||......||32i+31 of 64-bit x*/
-#define SWAP16(x)  (x) = ((((x) & 0x0000ffff0000ffffULL) << 16) | (((x) & 0xffff0000ffff0000ULL) >> 16));
-/*swapping bits 64i||64i+1||......||64i+31 with bits 64i+32||64i+33||......||64i+63 of 64-bit x*/
-#define SWAP32(x)  (x) = (((x) << 32) | ((x) >> 32));
-
-/*The MDS transform*/
-#define L(m0,m1,m2,m3,m4,m5,m6,m7) \
-      (m4) ^= (m1);                \
-      (m5) ^= (m2);                \
-      (m6) ^= (m0) ^ (m3);         \
-      (m7) ^= (m0);                \
-      (m0) ^= (m5);                \
-      (m1) ^= (m6);                \
-      (m2) ^= (m4) ^ (m7);         \
-      (m3) ^= (m4);
-
-/*Two Sboxes are computed in parallel, each Sbox implements S0 and S1, selected by a constant bit*/
-/*The reason to compute two Sboxes in parallel is to try to fully utilize the parallel processing power*/
-#define SS(m0,m1,m2,m3,m4,m5,m6,m7,cc0,cc1)   \
-      m3  = ~(m3);                  \
-      m7  = ~(m7);                  \
-      m0 ^= ((~(m2)) & (cc0));      \
-      m4 ^= ((~(m6)) & (cc1));      \
-      temp0 = (cc0) ^ ((m0) & (m1));\
-      temp1 = (cc1) ^ ((m4) & (m5));\
-      m0 ^= ((m2) & (m3));          \
-      m4 ^= ((m6) & (m7));          \
-      m3 ^= ((~(m1)) & (m2));       \
-      m7 ^= ((~(m5)) & (m6));       \
-      m1 ^= ((m0) & (m2));          \
-      m5 ^= ((m4) & (m6));          \
-      m2 ^= ((m0) & (~(m3)));       \
-      m6 ^= ((m4) & (~(m7)));       \
-      m0 ^= ((m1) | (m3));          \
-      m4 ^= ((m5) | (m7));          \
-      m3 ^= ((m1) & (m2));          \
-      m7 ^= ((m5) & (m6));          \
-      m1 ^= (temp0 & (m0));         \
-      m5 ^= (temp1 & (m4));         \
-      m2 ^= temp0;                  \
-      m6 ^= temp1;
-
-/*The bijective function E8, in bitslice form*/
-static void E8(hashState *state)
-{
-      uint64 i,roundnumber,temp0,temp1;
-
-      for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7) {
-            /*round 7*roundnumber+0: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP1(state->x[1][i]); SWAP1(state->x[3][i]); SWAP1(state->x[5][i]); SWAP1(state->x[7][i]);
-            }
-
-            /*round 7*roundnumber+1: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP2(state->x[1][i]); SWAP2(state->x[3][i]); SWAP2(state->x[5][i]); SWAP2(state->x[7][i]);
-            }
-
-            /*round 7*roundnumber+2: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP4(state->x[1][i]); SWAP4(state->x[3][i]); SWAP4(state->x[5][i]); SWAP4(state->x[7][i]);
-            }
-
-            /*round 7*roundnumber+3: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP8(state->x[1][i]); SWAP8(state->x[3][i]); SWAP8(state->x[5][i]); SWAP8(state->x[7][i]);
-            }
-
-            /*round 7*roundnumber+4: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP16(state->x[1][i]); SWAP16(state->x[3][i]); SWAP16(state->x[5][i]); SWAP16(state->x[7][i]);
-            }
-
-            /*round 7*roundnumber+5: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP32(state->x[1][i]); SWAP32(state->x[3][i]); SWAP32(state->x[5][i]); SWAP32(state->x[7][i]);
-            }
-
-            /*round 7*roundnumber+6: Sbox and MDS layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-            }
-            /*round 7*roundnumber+6: swapping layer*/
-            for (i = 1; i < 8; i = i+2) {
-                  temp0 = state->x[i][0]; state->x[i][0] = state->x[i][1]; state->x[i][1] = temp0;
-            }
-      }
-
-}
-
-/*The compression function F8 */
-static void F8(hashState *state)
-{
-      uint64  i;
-
-      /*xor the 512-bit message with the fist half of the 1024-bit hash state*/
-      for (i = 0; i < 8; i++)  state->x[i >> 1][i & 1] ^= ((uint64*)state->buffer)[i];
-
-      /*the bijective function E8 */
-      E8(state);
-
-      /*xor the 512-bit message with the second half of the 1024-bit hash state*/
-      for (i = 0; i < 8; i++)  state->x[(8+i) >> 1][(8+i) & 1] ^= ((uint64*)state->buffer)[i];
-}
-
-/*before hashing a message, initialize the hash state as H0 */
-static HashReturn Init(hashState *state, int hashbitlen)
-{
-	  state->databitlen = 0;
-	  state->datasize_in_buffer = 0;
-
-      /*initialize the initial hash value of JH*/
-      state->hashbitlen = hashbitlen;
-
-      /*load the intital hash value into state*/
-      switch (hashbitlen)
-      {
-            case 224: memcpy(state->x,JH224_H0,128); break;
-            case 256: memcpy(state->x,JH256_H0,128); break;
-            case 384: memcpy(state->x,JH384_H0,128); break;
-            case 512: memcpy(state->x,JH512_H0,128); break;
-      }
-
-      return(SUCCESS);
-}
-
-
-/*hash each 512-bit message block, except the last partial block*/
-static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen)
-{
-      DataLength index; /*the starting address of the data to be compressed*/
-
-      state->databitlen += databitlen;
-      index = 0;
-
-      /*if there is remaining data in the buffer, fill it to a full message block first*/
-      /*we assume that the size of the data in the buffer is the multiple of 8 bits if it is not at the end of a message*/
-
-      /*There is data in the buffer, but the incoming data is insufficient for a full block*/
-      if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512)  ) {
-            if ( (databitlen & 7) == 0 )
-                 memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, (size_t) (64-(state->datasize_in_buffer >> 3)) );
-            else memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, (size_t) (64 - (state->datasize_in_buffer >> 3) + 1) );
-            state->datasize_in_buffer += databitlen;
-            databitlen = 0;
-      }
-
-      /*There is data in the buffer, and the incoming data is sufficient for a full block*/
-      if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) >= 512)  ) {
-	        memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, (size_t) (64-(state->datasize_in_buffer >> 3)) );
-	        index = 64-(state->datasize_in_buffer >> 3);
-	        databitlen = databitlen - (512 - state->datasize_in_buffer);
-	        F8(state);
-	        state->datasize_in_buffer = 0;
-      }
-
-      /*hash the remaining full message blocks*/
-      for ( ; databitlen >= 512; index = index+64, databitlen = databitlen - 512) {
-            memcpy(state->buffer, data+index, 64);
-            F8(state);
-      }
-
-      /*store the partial block into buffer, assume that -- if part of the last byte is not part of the message, then that part consists of 0 bits*/
-      if ( databitlen > 0) {
-            if ((databitlen & 7) == 0)
-                  memcpy(state->buffer, data+index, (databitlen & 0x1ff) >> 3);
-            else
-                  memcpy(state->buffer, data+index, ((databitlen & 0x1ff) >> 3)+1);
-            state->datasize_in_buffer = databitlen;
-      }
-
-      return(SUCCESS);
-}
-
-/*pad the message, process the padded block(s), truncate the hash value H to obtain the message digest*/
-static HashReturn Final(hashState *state, BitSequence *hashval)
-{
-      unsigned int i;
-
-      if ( (state->databitlen & 0x1ff) == 0 ) {
-            /*pad the message when databitlen is multiple of 512 bits, then process the padded block*/
-            memset(state->buffer, 0, 64);
-            state->buffer[0]  = 0x80;
-            state->buffer[63] = state->databitlen & 0xff;
-            state->buffer[62] = (state->databitlen >> 8)  & 0xff;
-            state->buffer[61] = (state->databitlen >> 16) & 0xff;
-            state->buffer[60] = (state->databitlen >> 24) & 0xff;
-            state->buffer[59] = (state->databitlen >> 32) & 0xff;
-            state->buffer[58] = (state->databitlen >> 40) & 0xff;
-            state->buffer[57] = (state->databitlen >> 48) & 0xff;
-            state->buffer[56] = (state->databitlen >> 56) & 0xff;
-            F8(state);
-      }
-      else {
-		    /*set the rest of the bytes in the buffer to 0*/
-            if ( (state->datasize_in_buffer & 7) == 0)
-                  for (i = (state->databitlen & 0x1ff) >> 3; i < 64; i++)  state->buffer[i] = 0;
-            else
-                  for (i = ((state->databitlen & 0x1ff) >> 3)+1; i < 64; i++)  state->buffer[i] = 0;
-
-            /*pad and process the partial block when databitlen is not multiple of 512 bits, then hash the padded blocks*/
-            state->buffer[((state->databitlen & 0x1ff) >> 3)] |= 1 << (7- (state->databitlen & 7));
-
-            F8(state);
-            memset(state->buffer, 0, 64);
-            state->buffer[63] = state->databitlen & 0xff;
-            state->buffer[62] = (state->databitlen >> 8) & 0xff;
-            state->buffer[61] = (state->databitlen >> 16) & 0xff;
-            state->buffer[60] = (state->databitlen >> 24) & 0xff;
-            state->buffer[59] = (state->databitlen >> 32) & 0xff;
-            state->buffer[58] = (state->databitlen >> 40) & 0xff;
-            state->buffer[57] = (state->databitlen >> 48) & 0xff;
-            state->buffer[56] = (state->databitlen >> 56) & 0xff;
-            F8(state);
-      }
-
-      /*truncating the final hash value to generate the message digest*/
-      switch(state->hashbitlen) {
-            case 224: memcpy(hashval,(unsigned char*)state->x+64+36,28);  break;
-            case 256: memcpy(hashval,(unsigned char*)state->x+64+32,32);  break;
-            case 384: memcpy(hashval,(unsigned char*)state->x+64+16,48);  break;
-            case 512: memcpy(hashval,(unsigned char*)state->x+64,64);     break;
-      }
-
-      return(SUCCESS);
-}
-
-/* hash a message,
-   three inputs: message digest size in bits (hashbitlen); message (data); message length in bits (databitlen)
-   one output:   message digest (hashval)
-*/
-HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval)
-{
-      hashState state;
-
-      if ( hashbitlen == 224 || hashbitlen == 256 || hashbitlen == 384 || hashbitlen == 512 ) {
-            Init(&state, hashbitlen);
-            Update(&state, data, databitlen);
-            Final(&state, hashval);
-            return SUCCESS;
-      }
-      else
-            return(BAD_HASHLEN);
-}
--- a/crypto/c_jh.h
+++ b/crypto/c_jh.h
@@ -1,19 +0,0 @@
-/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C
-
-   --------------------------------
-   Performance
-
-   Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz)
-   Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic)
-   Speed for long message:
-   1) 45.8 cycles/byte   compiler: Intel C++ Compiler 11.1   compilation option: icc -O2
-   2) 56.8 cycles/byte   compiler: gcc 4.4.3                 compilation option: gcc -O3
-
-   --------------------------------
-   Last Modified: January 16, 2011
-*/
-#pragma once
-
-#include "hash.h"
-
-HashReturn jh_hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
--- a/crypto/c_keccak.c
+++ b/crypto/c_keccak.c
@@ -1,123 +0,0 @@
-// keccak.c
-// 19-Nov-11  Markku-Juhani O. Saarinen <mjos@iki.fi>
-// A baseline Keccak (3rd round) implementation.
-
-#include "hash-ops.h"
-#include "c_keccak.h"
-
-const uint64_t keccakf_rndc[24] = 
-{
-    0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
-    0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
-    0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
-    0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
-    0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
-    0x8000000000008003, 0x8000000000008002, 0x8000000000000080, 
-    0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
-    0x8000000000008080, 0x0000000080000001, 0x8000000080008008
-};
-
-const int keccakf_rotc[24] = 
-{
-    1,  3,  6,  10, 15, 21, 28, 36, 45, 55, 2,  14, 
-    27, 41, 56, 8,  25, 43, 62, 18, 39, 61, 20, 44
-};
-
-const int keccakf_piln[24] = 
-{
-    10, 7,  11, 17, 18, 3, 5,  16, 8,  21, 24, 4, 
-    15, 23, 19, 13, 12, 2, 20, 14, 22, 9,  6,  1 
-};
-
-// update the state with given number of rounds
-
-void keccakf(uint64_t st[25], int rounds)
-{
-    int i, j, round;
-    uint64_t t, bc[5];
-
-    for (round = 0; round < rounds; ++round) {
-
-        // Theta
-        bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
-        bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
-        bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
-        bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
-        bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
-
-        for (i = 0; i < 5; ++i) {
-            t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
-            st[i     ] ^= t;
-            st[i +  5] ^= t;
-            st[i + 10] ^= t;
-            st[i + 15] ^= t;
-            st[i + 20] ^= t;
-        }
-
-        // Rho Pi
-        t = st[1];
-        for (i = 0; i < 24; ++i) {
-            bc[0] = st[keccakf_piln[i]];
-            st[keccakf_piln[i]] = ROTL64(t, keccakf_rotc[i]);
-            t = bc[0];
-        }
-
-        //  Chi
-        for (j = 0; j < 25; j += 5) {
-            bc[0] = st[j    ];
-            bc[1] = st[j + 1];
-            bc[2] = st[j + 2];
-            bc[3] = st[j + 3];
-            bc[4] = st[j + 4];
-            st[j    ] ^= (~bc[1]) & bc[2];
-            st[j + 1] ^= (~bc[2]) & bc[3];
-            st[j + 2] ^= (~bc[3]) & bc[4];
-            st[j + 3] ^= (~bc[4]) & bc[0];
-            st[j + 4] ^= (~bc[0]) & bc[1];
-        }
-
-        //  Iota
-        st[0] ^= keccakf_rndc[round];
-    }
-}
-
-// compute a keccak hash (md) of given byte length from "in"
-typedef uint64_t state_t[25];
-
-int keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen)
-{
-    state_t st;
-    uint8_t temp[144];
-    int i, rsiz, rsizw;
-
-    rsiz = sizeof(state_t) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen;
-    rsizw = rsiz / 8;
-    
-    memset(st, 0, sizeof(st));
-
-    for ( ; inlen >= rsiz; inlen -= rsiz, in += rsiz) {
-        for (i = 0; i < rsizw; i++)
-            st[i] ^= ((uint64_t *) in)[i];
-        keccakf(st, KECCAK_ROUNDS);
-    }
-    
-    // last block and padding
-    memcpy(temp, in, inlen);
-    temp[inlen++] = 1;
-    memset(temp + inlen, 0, rsiz - inlen);
-    temp[rsiz - 1] |= 0x80;
-
-    for (i = 0; i < rsizw; i++)
-        st[i] ^= ((uint64_t *) temp)[i];
-
-    keccakf(st, KECCAK_ROUNDS);
-
-    memcpy(md, st, mdlen);
-
-    return 0;
-}
-
-void keccak1600(const uint8_t *in, int inlen, uint8_t *md)
-{
-    keccak(in, inlen, md, sizeof(state_t));
-}
--- a/crypto/c_keccak.h
+++ b/crypto/c_keccak.h
@@ -1,26 +0,0 @@
-// keccak.h
-// 19-Nov-11  Markku-Juhani O. Saarinen <mjos@iki.fi>
-
-#ifndef KECCAK_H
-#define KECCAK_H
-
-#include <stdint.h>
-#include <string.h>
-
-#ifndef KECCAK_ROUNDS
-#define KECCAK_ROUNDS 24
-#endif
-
-#ifndef ROTL64
-#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
-#endif
-
-// compute a keccak hash (md) of given byte length from "in"
-int keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen);
-
-// update the state
-void keccakf(uint64_t st[25], int norounds);
-
-void keccak1600(const uint8_t *in, int inlen, uint8_t *md);
-
-#endif
--- a/crypto/c_skein.c
+++ b/crypto/c_skein.c
--- a/crypto/c_skein.h
+++ b/crypto/c_skein.h
@@ -1,47 +0,0 @@
-#ifndef _SKEIN_H_
-#define _SKEIN_H_     1
-/**************************************************************************
-**
-** Interface declarations and internal definitions for Skein hashing.
-**
-** Source code author: Doug Whiting, 2008.
-**
-** This algorithm and source code is released to the public domain.
-**
-***************************************************************************
-** 
-** The following compile-time switches may be defined to control some
-** tradeoffs between speed, code size, error checking, and security.
-**
-** The "default" note explains what happens when the switch is not defined.
-**
-**  SKEIN_DEBUG            -- make callouts from inside Skein code
-**                            to examine/display intermediate values.
-**                            [default: no callouts (no overhead)]
-**
-**  SKEIN_ERR_CHECK        -- how error checking is handled inside Skein
-**                            code. If not defined, most error checking 
-**                            is disabled (for performance). Otherwise, 
-**                            the switch value is interpreted as:
-**                                0: use assert()      to flag errors
-**                                1: return SKEIN_FAIL to flag errors
-**
-***************************************************************************/
-#include "skein_port.h"                      /* get platform-specific definitions */
-
-typedef enum
-{
-  SKEIN_SUCCESS         =      0,          /* return codes from Skein calls */
-  SKEIN_FAIL            =      1,
-  SKEIN_BAD_HASHLEN     =      2
-}
-SkeinHashReturn;
-
-typedef size_t   SkeinDataLength;                /* bit count  type */
-typedef u08b_t   SkeinBitSequence;               /* bit stream type */
-
-/* "all-in-one" call */
-SkeinHashReturn skein_hash(int hashbitlen,   const SkeinBitSequence *data,
-        SkeinDataLength databitlen, SkeinBitSequence *hashval);
-
-#endif  /* ifndef _SKEIN_H_ */
--- a/crypto/groestl_tables.h
+++ b/crypto/groestl_tables.h
@@ -1,38 +0,0 @@
-#ifndef __tables_h
-#define __tables_h
-
-
-const uint32_t T[512] = {0xa5f432c6, 0xc6a597f4, 0x84976ff8, 0xf884eb97, 0x99b05eee, 0xee99c7b0, 0x8d8c7af6, 0xf68df78c, 0xd17e8ff, 0xff0de517, 0xbddc0ad6, 0xd6bdb7dc, 0xb1c816de, 0xdeb1a7c8, 0x54fc6d91, 0x915439fc
-, 0x50f09060, 0x6050c0f0, 0x3050702, 0x2030405, 0xa9e02ece, 0xcea987e0, 0x7d87d156, 0x567dac87, 0x192bcce7, 0xe719d52b, 0x62a613b5, 0xb56271a6, 0xe6317c4d, 0x4de69a31, 0x9ab559ec, 0xec9ac3b5
-, 0x45cf408f, 0x8f4505cf, 0x9dbca31f, 0x1f9d3ebc, 0x40c04989, 0x894009c0, 0x879268fa, 0xfa87ef92, 0x153fd0ef, 0xef15c53f, 0xeb2694b2, 0xb2eb7f26, 0xc940ce8e, 0x8ec90740, 0xb1de6fb, 0xfb0bed1d
-, 0xec2f6e41, 0x41ec822f, 0x67a91ab3, 0xb3677da9, 0xfd1c435f, 0x5ffdbe1c, 0xea256045, 0x45ea8a25, 0xbfdaf923, 0x23bf46da, 0xf7025153, 0x53f7a602, 0x96a145e4, 0xe496d3a1, 0x5bed769b, 0x9b5b2ded
-, 0xc25d2875, 0x75c2ea5d, 0x1c24c5e1, 0xe11cd924, 0xaee9d43d, 0x3dae7ae9, 0x6abef24c, 0x4c6a98be, 0x5aee826c, 0x6c5ad8ee, 0x41c3bd7e, 0x7e41fcc3, 0x206f3f5, 0xf502f106, 0x4fd15283, 0x834f1dd1
-, 0x5ce48c68, 0x685cd0e4, 0xf4075651, 0x51f4a207, 0x345c8dd1, 0xd134b95c, 0x818e1f9, 0xf908e918, 0x93ae4ce2, 0xe293dfae, 0x73953eab, 0xab734d95, 0x53f59762, 0x6253c4f5, 0x3f416b2a, 0x2a3f5441
-, 0xc141c08, 0x80c1014, 0x52f66395, 0x955231f6, 0x65afe946, 0x46658caf, 0x5ee27f9d, 0x9d5e21e2, 0x28784830, 0x30286078, 0xa1f8cf37, 0x37a16ef8, 0xf111b0a, 0xa0f1411, 0xb5c4eb2f, 0x2fb55ec4
-, 0x91b150e, 0xe091c1b, 0x365a7e24, 0x2436485a, 0x9bb6ad1b, 0x1b9b36b6, 0x3d4798df, 0xdf3da547, 0x266aa7cd, 0xcd26816a, 0x69bbf54e, 0x4e699cbb, 0xcd4c337f, 0x7fcdfe4c, 0x9fba50ea, 0xea9fcfba
-, 0x1b2d3f12, 0x121b242d, 0x9eb9a41d, 0x1d9e3ab9, 0x749cc458, 0x5874b09c, 0x2e724634, 0x342e6872, 0x2d774136, 0x362d6c77, 0xb2cd11dc, 0xdcb2a3cd, 0xee299db4, 0xb4ee7329, 0xfb164d5b, 0x5bfbb616
-, 0xf601a5a4, 0xa4f65301, 0x4dd7a176, 0x764decd7, 0x61a314b7, 0xb76175a3, 0xce49347d, 0x7dcefa49, 0x7b8ddf52, 0x527ba48d, 0x3e429fdd, 0xdd3ea142, 0x7193cd5e, 0x5e71bc93, 0x97a2b113, 0x139726a2
-, 0xf504a2a6, 0xa6f55704, 0x68b801b9, 0xb96869b8, 0x0, 0x0, 0x2c74b5c1, 0xc12c9974, 0x60a0e040, 0x406080a0, 0x1f21c2e3, 0xe31fdd21, 0xc8433a79, 0x79c8f243, 0xed2c9ab6, 0xb6ed772c
-, 0xbed90dd4, 0xd4beb3d9, 0x46ca478d, 0x8d4601ca, 0xd9701767, 0x67d9ce70, 0x4bddaf72, 0x724be4dd, 0xde79ed94, 0x94de3379, 0xd467ff98, 0x98d42b67, 0xe82393b0, 0xb0e87b23, 0x4ade5b85, 0x854a11de
-, 0x6bbd06bb, 0xbb6b6dbd, 0x2a7ebbc5, 0xc52a917e, 0xe5347b4f, 0x4fe59e34, 0x163ad7ed, 0xed16c13a, 0xc554d286, 0x86c51754, 0xd762f89a, 0x9ad72f62, 0x55ff9966, 0x6655ccff, 0x94a7b611, 0x119422a7
-, 0xcf4ac08a, 0x8acf0f4a, 0x1030d9e9, 0xe910c930, 0x60a0e04, 0x406080a, 0x819866fe, 0xfe81e798, 0xf00baba0, 0xa0f05b0b, 0x44ccb478, 0x7844f0cc, 0xbad5f025, 0x25ba4ad5, 0xe33e754b, 0x4be3963e
-, 0xf30eaca2, 0xa2f35f0e, 0xfe19445d, 0x5dfeba19, 0xc05bdb80, 0x80c01b5b, 0x8a858005, 0x58a0a85, 0xadecd33f, 0x3fad7eec, 0xbcdffe21, 0x21bc42df, 0x48d8a870, 0x7048e0d8, 0x40cfdf1, 0xf104f90c
-, 0xdf7a1963, 0x63dfc67a, 0xc1582f77, 0x77c1ee58, 0x759f30af, 0xaf75459f, 0x63a5e742, 0x426384a5, 0x30507020, 0x20304050, 0x1a2ecbe5, 0xe51ad12e, 0xe12effd, 0xfd0ee112, 0x6db708bf, 0xbf6d65b7
-, 0x4cd45581, 0x814c19d4, 0x143c2418, 0x1814303c, 0x355f7926, 0x26354c5f, 0x2f71b2c3, 0xc32f9d71, 0xe13886be, 0xbee16738, 0xa2fdc835, 0x35a26afd, 0xcc4fc788, 0x88cc0b4f, 0x394b652e, 0x2e395c4b
-, 0x57f96a93, 0x93573df9, 0xf20d5855, 0x55f2aa0d, 0x829d61fc, 0xfc82e39d, 0x47c9b37a, 0x7a47f4c9, 0xacef27c8, 0xc8ac8bef, 0xe73288ba, 0xbae76f32, 0x2b7d4f32, 0x322b647d, 0x95a442e6, 0xe695d7a4
-, 0xa0fb3bc0, 0xc0a09bfb, 0x98b3aa19, 0x199832b3, 0xd168f69e, 0x9ed12768, 0x7f8122a3, 0xa37f5d81, 0x66aaee44, 0x446688aa, 0x7e82d654, 0x547ea882, 0xabe6dd3b, 0x3bab76e6, 0x839e950b, 0xb83169e
-, 0xca45c98c, 0x8cca0345, 0x297bbcc7, 0xc729957b, 0xd36e056b, 0x6bd3d66e, 0x3c446c28, 0x283c5044, 0x798b2ca7, 0xa779558b, 0xe23d81bc, 0xbce2633d, 0x1d273116, 0x161d2c27, 0x769a37ad, 0xad76419a
-, 0x3b4d96db, 0xdb3bad4d, 0x56fa9e64, 0x6456c8fa, 0x4ed2a674, 0x744ee8d2, 0x1e223614, 0x141e2822, 0xdb76e492, 0x92db3f76, 0xa1e120c, 0xc0a181e, 0x6cb4fc48, 0x486c90b4, 0xe4378fb8, 0xb8e46b37
-, 0x5de7789f, 0x9f5d25e7, 0x6eb20fbd, 0xbd6e61b2, 0xef2a6943, 0x43ef862a, 0xa6f135c4, 0xc4a693f1, 0xa8e3da39, 0x39a872e3, 0xa4f7c631, 0x31a462f7, 0x37598ad3, 0xd337bd59, 0x8b8674f2, 0xf28bff86
-, 0x325683d5, 0xd532b156, 0x43c54e8b, 0x8b430dc5, 0x59eb856e, 0x6e59dceb, 0xb7c218da, 0xdab7afc2, 0x8c8f8e01, 0x18c028f, 0x64ac1db1, 0xb16479ac, 0xd26df19c, 0x9cd2236d, 0xe03b7249, 0x49e0923b
-, 0xb4c71fd8, 0xd8b4abc7, 0xfa15b9ac, 0xacfa4315, 0x709faf3, 0xf307fd09, 0x256fa0cf, 0xcf25856f, 0xafea20ca, 0xcaaf8fea, 0x8e897df4, 0xf48ef389, 0xe9206747, 0x47e98e20, 0x18283810, 0x10182028
-, 0xd5640b6f, 0x6fd5de64, 0x888373f0, 0xf088fb83, 0x6fb1fb4a, 0x4a6f94b1, 0x7296ca5c, 0x5c72b896, 0x246c5438, 0x3824706c, 0xf1085f57, 0x57f1ae08, 0xc7522173, 0x73c7e652, 0x51f36497, 0x975135f3
-, 0x2365aecb, 0xcb238d65, 0x7c8425a1, 0xa17c5984, 0x9cbf57e8, 0xe89ccbbf, 0x21635d3e, 0x3e217c63, 0xdd7cea96, 0x96dd377c, 0xdc7f1e61, 0x61dcc27f, 0x86919c0d, 0xd861a91, 0x85949b0f, 0xf851e94
-, 0x90ab4be0, 0xe090dbab, 0x42c6ba7c, 0x7c42f8c6, 0xc4572671, 0x71c4e257, 0xaae529cc, 0xccaa83e5, 0xd873e390, 0x90d83b73, 0x50f0906, 0x6050c0f, 0x103f4f7, 0xf701f503, 0x12362a1c, 0x1c123836
-, 0xa3fe3cc2, 0xc2a39ffe, 0x5fe18b6a, 0x6a5fd4e1, 0xf910beae, 0xaef94710, 0xd06b0269, 0x69d0d26b, 0x91a8bf17, 0x17912ea8, 0x58e87199, 0x995829e8, 0x2769533a, 0x3a277469, 0xb9d0f727, 0x27b94ed0
-, 0x384891d9, 0xd938a948, 0x1335deeb, 0xeb13cd35, 0xb3cee52b, 0x2bb356ce, 0x33557722, 0x22334455, 0xbbd604d2, 0xd2bbbfd6, 0x709039a9, 0xa9704990, 0x89808707, 0x7890e80, 0xa7f2c133, 0x33a766f2
-, 0xb6c1ec2d, 0x2db65ac1, 0x22665a3c, 0x3c227866, 0x92adb815, 0x15922aad, 0x2060a9c9, 0xc9208960, 0x49db5c87, 0x874915db, 0xff1ab0aa, 0xaaff4f1a, 0x7888d850, 0x5078a088, 0x7a8e2ba5, 0xa57a518e
-, 0x8f8a8903, 0x38f068a, 0xf8134a59, 0x59f8b213, 0x809b9209, 0x980129b, 0x1739231a, 0x1a173439, 0xda751065, 0x65daca75, 0x315384d7, 0xd731b553, 0xc651d584, 0x84c61351, 0xb8d303d0, 0xd0b8bbd3
-, 0xc35edc82, 0x82c31f5e, 0xb0cbe229, 0x29b052cb, 0x7799c35a, 0x5a77b499, 0x11332d1e, 0x1e113c33, 0xcb463d7b, 0x7bcbf646, 0xfc1fb7a8, 0xa8fc4b1f, 0xd6610c6d, 0x6dd6da61, 0x3a4e622c, 0x2c3a584e};
-
-#endif /* __tables_h */
--- a/crypto/hash-ops.h
+++ b/crypto/hash-ops.h
@@ -1,59 +0,0 @@
-// Copyright (c) 2012-2013 The Cryptonote developers
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#pragma once
-
-#if !defined(__cplusplus)
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#include "int-util.h"
-
-#if 0
-static inline void *padd(void *p, size_t i) {
-  return (char *) p + i;
-}
-
-static inline const void *cpadd(const void *p, size_t i) {
-  return (const char *) p + i;
-}
-
-static inline void place_length(uint8_t *buffer, size_t bufsize, size_t length) {
-  if (sizeof(size_t) == 4) {
-    *(uint32_t*) padd(buffer, bufsize - 4) = swap32be(length);
-  } else {
-    *(uint64_t*) padd(buffer, bufsize - 8) = swap64be(length);
-  }
-}
-#endif
-
-#pragma pack(push, 1)
-union hash_state {
-  uint8_t b[200];
-  uint64_t w[25];
-};
-#pragma pack(pop)
-
-void hash_permutation(union hash_state *state);
-void hash_process(union hash_state *state, const uint8_t *buf, int count);
-
-#endif
-
-enum {
-  HASH_SIZE = 32,
-  HASH_DATA_AREA = 136
-};
-
-void cn_fast_hash(const void *data, int len, char *hash);
-void cn_slow_hash(const void *data, size_t length, char *hash);
-
-void hash_extra_blake(const void *data, size_t length, char *hash);
-void hash_extra_groestl(const void *data, size_t length, char *hash);
-void hash_extra_jh(const void *data, size_t length, char *hash);
-void hash_extra_skein(const void *data, size_t length, char *hash);
-
-void tree_hash(const char (*hashes)[HASH_SIZE], size_t count, char *root_hash);
--- a/crypto/hash.c
+++ b/crypto/hash.c
@@ -1,24 +0,0 @@
-// Copyright (c) 2012-2013 The Cryptonote developers
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-
-#include "hash-ops.h"
-#include "c_keccak.h"
-
-void hash_permutation(union hash_state *state) {
-  keccakf((uint64_t*)state, 24);
-}
-
-void hash_process(union hash_state *state, const uint8_t *buf, int count) {
-  keccak1600(buf, count, (uint8_t*)state);
-}
-
-void cn_fast_hash(const void *data, int len, char *hash) {
-  union hash_state state;
-  hash_process(&state, data, len);
-  memcpy(hash, &state, HASH_SIZE);
-}
--- a/crypto/hash.h
+++ b/crypto/hash.h
@@ -1,5 +0,0 @@
-#pragma once
-
-typedef unsigned char BitSequence;
-typedef unsigned long long DataLength;
-typedef enum {SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2} HashReturn;
--- a/crypto/int-util.h
+++ b/crypto/int-util.h
@@ -1,195 +0,0 @@
-// Copyright (c) 2012-2013 The Cryptonote developers
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#pragma once
-#ifndef INT_UTILS_H_
-#define INT_UTILS_H_
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <string.h>
-#ifndef _MSC_VER
-#include <sys/param.h>
-#else
-#define inline __inline
-#endif
-
-#ifndef LITTLE_ENDIAN
-#define LITTLE_ENDIAN 0x1234
-#define BIG_ENDIAN 0x4321
-#endif
-
-#if !defined(BYTE_ORDER) && (defined(__LITTLE_ENDIAN__) || defined(__arm__) || defined(WIN32))
-#define BYTE_ORDER LITTLE_ENDIAN
-#endif
-
-#if defined(WIN32)
-#include <stdlib.h>
-
-static inline uint32_t rol32(uint32_t x, int r) {
-  return _rotl(x, r);
-}
-
-static inline uint64_t rol64(uint64_t x, int r) {
-  return _rotl64(x, r);
-}
-
-#else
-
-static inline uint32_t rol32(uint32_t x, int r) {
-  return (x << (r & 31)) | (x >> (-r & 31));
-}
-
-static inline uint64_t rol64(uint64_t x, int r) {
-  return (x << (r & 63)) | (x >> (-r & 63));
-}
-
-#endif
-
-static inline uint64_t hi_dword(uint64_t val) {
-  return val >> 32;
-}
-
-static inline uint64_t lo_dword(uint64_t val) {
-  return val & 0xFFFFFFFF;
-}
-
-static inline uint64_t div_with_reminder(uint64_t dividend, uint32_t divisor, uint32_t* remainder) {
-  dividend |= ((uint64_t)*remainder) << 32;
-  *remainder = dividend % divisor;
-  return dividend / divisor;
-}
-
-// Long division with 2^32 base
-static inline uint32_t div128_32(uint64_t dividend_hi, uint64_t dividend_lo, uint32_t divisor, uint64_t* quotient_hi, uint64_t* quotient_lo) {
-  uint64_t dividend_dwords[4];
-  uint32_t remainder = 0;
-
-  dividend_dwords[3] = hi_dword(dividend_hi);
-  dividend_dwords[2] = lo_dword(dividend_hi);
-  dividend_dwords[1] = hi_dword(dividend_lo);
-  dividend_dwords[0] = lo_dword(dividend_lo);
-
-  *quotient_hi  = div_with_reminder(dividend_dwords[3], divisor, &remainder) << 32;
-  *quotient_hi |= div_with_reminder(dividend_dwords[2], divisor, &remainder);
-  *quotient_lo  = div_with_reminder(dividend_dwords[1], divisor, &remainder) << 32;
-  *quotient_lo |= div_with_reminder(dividend_dwords[0], divisor, &remainder);
-
-  return remainder;
-}
-
-#define IDENT32(x) ((uint32_t) (x))
-#define IDENT64(x) ((uint64_t) (x))
-
-#define SWAP32(x) ((((uint32_t) (x) & 0x000000ff) << 24) | \
-  (((uint32_t) (x) & 0x0000ff00) <<  8) | \
-  (((uint32_t) (x) & 0x00ff0000) >>  8) | \
-  (((uint32_t) (x) & 0xff000000) >> 24))
-#define SWAP64(x) ((((uint64_t) (x) & 0x00000000000000ff) << 56) | \
-  (((uint64_t) (x) & 0x000000000000ff00) << 40) | \
-  (((uint64_t) (x) & 0x0000000000ff0000) << 24) | \
-  (((uint64_t) (x) & 0x00000000ff000000) <<  8) | \
-  (((uint64_t) (x) & 0x000000ff00000000) >>  8) | \
-  (((uint64_t) (x) & 0x0000ff0000000000) >> 24) | \
-  (((uint64_t) (x) & 0x00ff000000000000) >> 40) | \
-  (((uint64_t) (x) & 0xff00000000000000) >> 56))
-
-static inline uint32_t ident32(uint32_t x) { return x; }
-static inline uint64_t ident64(uint64_t x) { return x; }
-
-static inline uint32_t swap32(uint32_t x) {
-  x = ((x & 0x00ff00ff) << 8) | ((x & 0xff00ff00) >> 8);
-  return (x << 16) | (x >> 16);
-}
-static inline uint64_t swap64(uint64_t x) {
-  x = ((x & 0x00ff00ff00ff00ff) <<  8) | ((x & 0xff00ff00ff00ff00) >>  8);
-  x = ((x & 0x0000ffff0000ffff) << 16) | ((x & 0xffff0000ffff0000) >> 16);
-  return (x << 32) | (x >> 32);
-}
-
-#if defined(__GNUC__)
-#define UNUSED __attribute__((unused))
-#else
-#define UNUSED
-#endif
-static inline void mem_inplace_ident(void *mem UNUSED, size_t n UNUSED) { }
-#undef UNUSED
-
-static inline void mem_inplace_swap32(void *mem, size_t n) {
-  size_t i;
-  for (i = 0; i < n; i++) {
-    ((uint32_t *) mem)[i] = swap32(((const uint32_t *) mem)[i]);
-  }
-}
-static inline void mem_inplace_swap64(void *mem, size_t n) {
-  size_t i;
-  for (i = 0; i < n; i++) {
-    ((uint64_t *) mem)[i] = swap64(((const uint64_t *) mem)[i]);
-  }
-}
-
-static inline void memcpy_ident32(void *dst, const void *src, size_t n) {
-  memcpy(dst, src, 4 * n);
-}
-static inline void memcpy_ident64(void *dst, const void *src, size_t n) {
-  memcpy(dst, src, 8 * n);
-}
-
-static inline void memcpy_swap32(void *dst, const void *src, size_t n) {
-  size_t i;
-  for (i = 0; i < n; i++) {
-    ((uint32_t *) dst)[i] = swap32(((const uint32_t *) src)[i]);
-  }
-}
-static inline void memcpy_swap64(void *dst, const void *src, size_t n) {
-  size_t i;
-  for (i = 0; i < n; i++) {
-    ((uint64_t *) dst)[i] = swap64(((const uint64_t *) src)[i]);
-  }
-}
-
-#if !defined(BYTE_ORDER) || !defined(LITTLE_ENDIAN) || !defined(BIG_ENDIAN)
-static_assert(false, "BYTE_ORDER is undefined. Perhaps, GNU extensions are not enabled");
-#endif
-
-#if BYTE_ORDER == LITTLE_ENDIAN
-#define SWAP32LE IDENT32
-#define SWAP32BE SWAP32
-#define swap32le ident32
-#define swap32be swap32
-#define mem_inplace_swap32le mem_inplace_ident
-#define mem_inplace_swap32be mem_inplace_swap32
-#define memcpy_swap32le memcpy_ident32
-#define memcpy_swap32be memcpy_swap32
-#define SWAP64LE IDENT64
-#define SWAP64BE SWAP64
-#define swap64le ident64
-#define swap64be swap64
-#define mem_inplace_swap64le mem_inplace_ident
-#define mem_inplace_swap64be mem_inplace_swap64
-#define memcpy_swap64le memcpy_ident64
-#define memcpy_swap64be memcpy_swap64
-#endif
-
-#if BYTE_ORDER == BIG_ENDIAN
-#define SWAP32BE IDENT32
-#define SWAP32LE SWAP32
-#define swap32be ident32
-#define swap32le swap32
-#define mem_inplace_swap32be mem_inplace_ident
-#define mem_inplace_swap32le mem_inplace_swap32
-#define memcpy_swap32be memcpy_ident32
-#define memcpy_swap32le memcpy_swap32
-#define SWAP64BE IDENT64
-#define SWAP64LE SWAP64
-#define swap64be ident64
-#define swap64le swap64
-#define mem_inplace_swap64be mem_inplace_ident
-#define mem_inplace_swap64le mem_inplace_swap64
-#define memcpy_swap64be memcpy_ident64
-#define memcpy_swap64le memcpy_swap64
-#endif
-
-#endif /* INT_UTILS_H_ */
--- a/crypto/oaes_config.h
+++ b/crypto/oaes_config.h
@@ -1,50 +0,0 @@
-/* 
- * ---------------------------------------------------------------------------
- * OpenAES License
- * ---------------------------------------------------------------------------
- * Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * 
- *   - Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   - Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- * ---------------------------------------------------------------------------
- */
-
-#ifndef _OAES_CONFIG_H
-#define _OAES_CONFIG_H
-
-#ifdef __cplusplus 
-extern "C" {
-#endif
-
-//#ifndef OAES_HAVE_ISAAC
-//#define OAES_HAVE_ISAAC 1
-//#endif // OAES_HAVE_ISAAC
-
-//#ifndef OAES_DEBUG
-//#define OAES_DEBUG 0
-//#endif // OAES_DEBUG
-
-#ifdef __cplusplus 
-}
-#endif
-
-#endif // _OAES_CONFIG_H
--- a/crypto/oaes_lib.c
+++ b/crypto/oaes_lib.c
--- a/crypto/oaes_lib.h
+++ b/crypto/oaes_lib.h
@@ -1,214 +0,0 @@
-/* 
- * ---------------------------------------------------------------------------
- * OpenAES License
- * ---------------------------------------------------------------------------
- * Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * 
- *   - Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   - Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- * ---------------------------------------------------------------------------
- */
-
-#ifndef _OAES_LIB_H
-#define _OAES_LIB_H
-
-#include <stdint.h>
-
-#ifdef __cplusplus 
-extern "C" {
-#endif
-
-#ifdef _WIN32
-#	ifdef OAES_SHARED
-#		ifdef oaes_lib_EXPORTS
-#			define OAES_API __declspec(dllexport)
-#		else
-#			define OAES_API __declspec(dllimport)
-#		endif
-#	else
-#		define OAES_API
-#	endif
-#else
-#	define OAES_API
-#endif // WIN32
-
-#define OAES_VERSION "0.8.1"
-#define OAES_BLOCK_SIZE 16
-
-typedef void OAES_CTX;
-
-typedef enum
-{
-	OAES_RET_FIRST = 0,
-	OAES_RET_SUCCESS = 0,
-	OAES_RET_UNKNOWN,
-	OAES_RET_ARG1,
-	OAES_RET_ARG2,
-	OAES_RET_ARG3,
-	OAES_RET_ARG4,
-	OAES_RET_ARG5,
-	OAES_RET_NOKEY,
-	OAES_RET_MEM,
-	OAES_RET_BUF,
-	OAES_RET_HEADER,
-	OAES_RET_COUNT
-} OAES_RET;
-
-/*
- * oaes_set_option() takes one of these values for its [option] parameter
- * some options accept either an optional or a required [value] parameter
- */
-// no option
-#define OAES_OPTION_NONE 0
-// enable ECB mode, disable CBC mode
-#define OAES_OPTION_ECB 1
-// enable CBC mode, disable ECB mode
-// value is optional, may pass uint8_t iv[OAES_BLOCK_SIZE] to specify
-// the value of the initialization vector, iv
-#define OAES_OPTION_CBC 2
-
-#ifdef OAES_DEBUG
-typedef int ( * oaes_step_cb ) (
-		const uint8_t state[OAES_BLOCK_SIZE],
-		const char * step_name,
-		int step_count,
-		void * user_data );
-// enable state stepping mode
-// value is required, must pass oaes_step_cb to receive the state at each step
-#define OAES_OPTION_STEP_ON 4
-// disable state stepping mode
-#define OAES_OPTION_STEP_OFF 8
-#endif // OAES_DEBUG
-
-typedef uint16_t OAES_OPTION;
-
-typedef struct _oaes_key
-{
-  size_t data_len;
-  uint8_t *data;
-  size_t exp_data_len;
-  uint8_t *exp_data;
-  size_t num_keys;
-  size_t key_base;
-} oaes_key;
-
-typedef struct _oaes_ctx
-{
-#ifdef OAES_HAVE_ISAAC
-  randctx * rctx;
-#endif // OAES_HAVE_ISAAC
-
-#ifdef OAES_DEBUG
-  oaes_step_cb step_cb;
-#endif // OAES_DEBUG
-
-  oaes_key * key;
-  OAES_OPTION options;
-  uint8_t iv[OAES_BLOCK_SIZE];
-} oaes_ctx;
-/*
- * // usage:
- * 
- * OAES_CTX * ctx = oaes_alloc();
- * .
- * .
- * .
- * {
- *   oaes_gen_key_xxx( ctx );
- *   {
- *     oaes_key_export( ctx, _buf, &_buf_len );
- *     // or
- *     oaes_key_export_data( ctx, _buf, &_buf_len );\
- *   }
- * }
- * // or
- * {
- *   oaes_key_import( ctx, _buf, _buf_len );
- *   // or
- *   oaes_key_import_data( ctx, _buf, _buf_len );
- * }
- * .
- * .
- * .
- * oaes_encrypt( ctx, m, m_len, c, &c_len );
- * .
- * .
- * .
- * oaes_decrypt( ctx, c, c_len, m, &m_len );
- * .
- * .
- * .
- * oaes_free( &ctx );
- */
-
-OAES_API OAES_CTX * oaes_alloc(void);
-
-OAES_API OAES_RET oaes_free( OAES_CTX ** ctx );
-
-OAES_API OAES_RET oaes_set_option( OAES_CTX * ctx,
-		OAES_OPTION option, const void * value );
-
-OAES_API OAES_RET oaes_key_gen_128( OAES_CTX * ctx );
-
-OAES_API OAES_RET oaes_key_gen_192( OAES_CTX * ctx );
-
-OAES_API OAES_RET oaes_key_gen_256( OAES_CTX * ctx );
-
-// export key with header information
-// set data == NULL to get the required data_len
-OAES_API OAES_RET oaes_key_export( OAES_CTX * ctx,
-		uint8_t * data, size_t * data_len );
-
-// directly export the data from key
-// set data == NULL to get the required data_len
-OAES_API OAES_RET oaes_key_export_data( OAES_CTX * ctx,
-		uint8_t * data, size_t * data_len );
-
-// import key with header information
-OAES_API OAES_RET oaes_key_import( OAES_CTX * ctx,
-		const uint8_t * data, size_t data_len );
-
-// directly import data into key
-OAES_API OAES_RET oaes_key_import_data( OAES_CTX * ctx,
-		const uint8_t * data, size_t data_len );
-
-// set c == NULL to get the required c_len
-OAES_API OAES_RET oaes_encrypt( OAES_CTX * ctx,
-		const uint8_t * m, size_t m_len, uint8_t * c, size_t * c_len );
-
-// set m == NULL to get the required m_len
-OAES_API OAES_RET oaes_decrypt( OAES_CTX * ctx,
-		const uint8_t * c, size_t c_len, uint8_t * m, size_t * m_len );
-
-// set buf == NULL to get the required buf_len
-OAES_API OAES_RET oaes_sprintf(
-		char * buf, size_t * buf_len, const uint8_t * data, size_t data_len );
-
-OAES_API OAES_RET oaes_encryption_round( const uint8_t * key, uint8_t * c );
-
-OAES_API OAES_RET oaes_pseudo_encrypt_ecb( OAES_CTX * ctx, uint8_t * c );
-
-#ifdef __cplusplus 
-}
-#endif
-
-#endif // _OAES_LIB_H
--- a/crypto/skein_port.h
+++ b/crypto/skein_port.h
@@ -1,190 +0,0 @@
-#ifndef _SKEIN_PORT_H_
-#define _SKEIN_PORT_H_
-
-#include <limits.h>
-#include <stdint.h>
-
-#ifndef RETURN_VALUES
-#  define RETURN_VALUES
-#  if defined( DLL_EXPORT )
-#    if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
-#      define VOID_RETURN    __declspec( dllexport ) void __stdcall
-#      define INT_RETURN     __declspec( dllexport ) int  __stdcall
-#    elif defined( __GNUC__ )
-#      define VOID_RETURN    __declspec( __dllexport__ ) void
-#      define INT_RETURN     __declspec( __dllexport__ ) int
-#    else
-#      error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
-#    endif
-#  elif defined( DLL_IMPORT )
-#    if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
-#      define VOID_RETURN    __declspec( dllimport ) void __stdcall
-#      define INT_RETURN     __declspec( dllimport ) int  __stdcall
-#    elif defined( __GNUC__ )
-#      define VOID_RETURN    __declspec( __dllimport__ ) void
-#      define INT_RETURN     __declspec( __dllimport__ ) int
-#    else
-#      error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
-#    endif
-#  elif defined( __WATCOMC__ )
-#    define VOID_RETURN  void __cdecl
-#    define INT_RETURN   int  __cdecl
-#  else
-#    define VOID_RETURN  void
-#    define INT_RETURN   int
-#  endif
-#endif
-
-/*  These defines are used to declare buffers in a way that allows
-    faster operations on longer variables to be used.  In all these
-    defines 'size' must be a power of 2 and >= 8
-
-    dec_unit_type(size,x)       declares a variable 'x' of length 
-                                'size' bits
-
-    dec_bufr_type(size,bsize,x) declares a buffer 'x' of length 'bsize' 
-                                bytes defined as an array of variables
-                                each of 'size' bits (bsize must be a 
-                                multiple of size / 8)
-
-    ptr_cast(x,size)            casts a pointer to a pointer to a 
-                                varaiable of length 'size' bits
-*/
-
-#define ui_type(size)               uint##size##_t
-#define dec_unit_type(size,x)       typedef ui_type(size) x
-#define dec_bufr_type(size,bsize,x) typedef ui_type(size) x[bsize / (size >> 3)]
-#define ptr_cast(x,size)            ((ui_type(size)*)(x))
-
-typedef unsigned int    uint_t;             /* native unsigned integer */
-typedef uint8_t         u08b_t;             /*  8-bit unsigned integer */
-typedef uint64_t        u64b_t;             /* 64-bit unsigned integer */
-
-#ifndef RotL_64
-#define RotL_64(x,N)    (((x) << (N)) | ((x) >> (64-(N))))
-#endif
-
-/*
- * Skein is "natively" little-endian (unlike SHA-xxx), for optimal
- * performance on x86 CPUs.  The Skein code requires the following
- * definitions for dealing with endianness:
- *
- *    SKEIN_NEED_SWAP:  0 for little-endian, 1 for big-endian
- *    Skein_Put64_LSB_First
- *    Skein_Get64_LSB_First
- *    Skein_Swap64
- *
- * If SKEIN_NEED_SWAP is defined at compile time, it is used here
- * along with the portable versions of Put64/Get64/Swap64, which 
- * are slow in general.
- *
- * Otherwise, an "auto-detect" of endianness is attempted below.
- * If the default handling doesn't work well, the user may insert
- * platform-specific code instead (e.g., for big-endian CPUs).
- *
- */
-#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */
-
-
-#include "int-util.h"
-
-#define IS_BIG_ENDIAN      4321 /* byte 0 is most significant (mc68k) */
-#define IS_LITTLE_ENDIAN   1234 /* byte 0 is least significant (i386) */
-
-#if BYTE_ORDER == LITTLE_ENDIAN
-#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#endif
-
-#if BYTE_ORDER == BIG_ENDIAN
-#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#endif
-
-/* special handler for IA64, which may be either endianness (?)  */
-/* here we assume little-endian, but this may need to be changed */
-#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
-#  define PLATFORM_MUST_ALIGN (1)
-#ifndef PLATFORM_BYTE_ORDER
-#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#endif
-#endif
-
-#ifndef   PLATFORM_MUST_ALIGN
-#  define PLATFORM_MUST_ALIGN (0)
-#endif
-
-
-#if   PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
-    /* here for big-endian CPUs */
-#define SKEIN_NEED_SWAP   (1)
-#elif PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
-    /* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
-#define SKEIN_NEED_SWAP   (0)
-#if   PLATFORM_MUST_ALIGN == 0              /* ok to use "fast" versions? */
-#define Skein_Put64_LSB_First(dst08,src64,bCnt) memcpy(dst08,src64,bCnt)
-#define Skein_Get64_LSB_First(dst64,src08,wCnt) memcpy(dst64,src08,8*(wCnt))
-#endif
-#else
-#error "Skein needs endianness setting!"
-#endif
-
-#endif /* ifndef SKEIN_NEED_SWAP */
-
-/*
- ******************************************************************
- *      Provide any definitions still needed.
- ******************************************************************
- */
-#ifndef Skein_Swap64  /* swap for big-endian, nop for little-endian */
-#if     SKEIN_NEED_SWAP
-#define Skein_Swap64(w64)                       \
-  ( (( ((u64b_t)(w64))       & 0xFF) << 56) |   \
-    (((((u64b_t)(w64)) >> 8) & 0xFF) << 48) |   \
-    (((((u64b_t)(w64)) >>16) & 0xFF) << 40) |   \
-    (((((u64b_t)(w64)) >>24) & 0xFF) << 32) |   \
-    (((((u64b_t)(w64)) >>32) & 0xFF) << 24) |   \
-    (((((u64b_t)(w64)) >>40) & 0xFF) << 16) |   \
-    (((((u64b_t)(w64)) >>48) & 0xFF) <<  8) |   \
-    (((((u64b_t)(w64)) >>56) & 0xFF)      ) )
-#else
-#define Skein_Swap64(w64)  (w64)
-#endif
-#endif  /* ifndef Skein_Swap64 */
-
-
-#ifndef Skein_Put64_LSB_First
-void    Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt)
-#ifdef  SKEIN_PORT_CODE /* instantiate the function code here? */
-    { /* this version is fully portable (big-endian or little-endian), but slow */
-    size_t n;
-
-    for (n=0;n<bCnt;n++)
-        dst[n] = (u08b_t) (src[n>>3] >> (8*(n&7)));
-    }
-#else
-    ;    /* output only the function prototype */
-#endif
-#endif   /* ifndef Skein_Put64_LSB_First */
-
-
-#ifndef Skein_Get64_LSB_First
-void    Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt)
-#ifdef  SKEIN_PORT_CODE /* instantiate the function code here? */
-    { /* this version is fully portable (big-endian or little-endian), but slow */
-    size_t n;
-
-    for (n=0;n<8*wCnt;n+=8)
-        dst[n/8] = (((u64b_t) src[n  ])      ) +
-                   (((u64b_t) src[n+1]) <<  8) +
-                   (((u64b_t) src[n+2]) << 16) +
-                   (((u64b_t) src[n+3]) << 24) +
-                   (((u64b_t) src[n+4]) << 32) +
-                   (((u64b_t) src[n+5]) << 40) +
-                   (((u64b_t) src[n+6]) << 48) +
-                   (((u64b_t) src[n+7]) << 56) ;
-    }
-#else
-    ;    /* output only the function prototype */
-#endif
-#endif   /* ifndef Skein_Get64_LSB_First */
-
-#endif   /* ifndef _SKEIN_PORT_H_ */
--- a/miner.h
+++ b/miner.h
@@ -447,26 +447,14 @@ bool stratum_subscribe(struct stratum_ctx *sctx);
 bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass);
 bool stratum_handle_method(struct stratum_ctx *sctx, const char *s);

-/* rpc 2.0 (xmr) */
+extern bool lowdiff_debug;
+


-extern bool jsonrpc_2;
 extern bool aes_ni_supported;
-extern char rpc2_id[64];
-extern char *rpc2_blob;
-extern size_t rpc2_bloblen;
-extern uint32_t rpc2_target;
-extern char *rpc2_job_id;
 extern char *rpc_user;
 extern char *short_url;

-json_t *json_rpc2_call(CURL *curl, const char *url, const char *userpass, const char *rpc_req, int *curl_err, int flags);
-bool rpc2_login(CURL *curl);
-bool rpc2_login_decode(const json_t *val);
-bool rpc2_workio_login(CURL *curl);
-bool rpc2_stratum_job(struct stratum_ctx *sctx, json_t *params);
-bool rpc2_job_decode(const json_t *job, struct work *work);
-
 struct thread_q;

 struct thread_q *tq_new(void);
@@ -763,8 +751,6 @@ extern bool opt_hash_meter;
 extern uint32_t accepted_share_count;
 extern uint32_t rejected_share_count;
 extern uint32_t solved_block_count;
-extern pthread_mutex_t rpc2_job_lock;
-extern pthread_mutex_t rpc2_login_lock;
 extern pthread_mutex_t applog_lock;
 extern pthread_mutex_t stats_lock;
 extern bool opt_sapling;
--- a/simd-utils/simd-256.h
+++ b/simd-utils/simd-256.h
@@ -121,12 +121,9 @@ do { \


 // Horizontal vector testing
-
-#define mm256_allbits0( a )    _mm256_testz_si256(   a, a )
-#define mm256_allbits1( a )    _mm256_testc_si256(   a, m256_neg1 )
-//broken
-//#define mm256_allbitsne( a )   _mm256_testnzc_si256( a, m256_neg1 )
-#define mm256_anybits0( a )    !mm256_allbits1( a )
+#define mm256_allbits0( a )    _mm256_testc_si256(   a, m256_neg1 )
+#define mm256_allbits1( a )    _mm256_testz_si256(   a, a )
+#define mm256_anybits0( a )   !mm256_allbits1( a )
 #define mm256_anybits1( a )   !mm256_allbits0( a )


--- a/util.c
+++ b/util.c
@@ -1423,9 +1423,6 @@ bool stratum_subscribe(struct stratum_ctx *sctx)
 	json_error_t err;
 	bool ret = false, retry = false;

-	if (jsonrpc_2)
-		return true;
-
 start:
 	s = (char*) malloc(128 + (sctx->session_id ? strlen(sctx->session_id) : 0));
 	if (retry)
@@ -1514,16 +1511,9 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
 	json_error_t err;
 	bool ret = false;

-	if (jsonrpc_2) {
-		s = (char*) malloc(300 + strlen(user) + strlen(pass));
-		sprintf(s, "{\"method\": \"login\", \"params\": {"
-			"\"login\": \"%s\", \"pass\": \"%s\", \"agent\": \"%s\"}, \"id\": 1}",
-			user, pass, USER_AGENT);
-	} else {
-		s = (char*) malloc(80 + strlen(user) + strlen(pass));
-		sprintf(s, "{\"id\": 2, \"method\": \"mining.authorize\", \"params\": [\"%s\", \"%s\"]}",
+	s = (char*) malloc(80 + strlen(user) + strlen(pass));
+	sprintf(s, "{\"id\": 2, \"method\": \"mining.authorize\", \"params\": [\"%s\", \"%s\"]}",
 			user, pass);
-	}

 	if (!stratum_send_line(sctx, s))
 		goto out;
@@ -1553,15 +1543,6 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
 		goto out;
 	}

-	if (jsonrpc_2) {
-		rpc2_login_decode(val);
-		json_t *job_val = json_object_get(res_val, "job");
-		pthread_mutex_lock(&sctx->work_lock);
-		if(job_val) rpc2_job_decode(job_val, &sctx->work);
-                sctx->job.job_id = strdup(sctx->work.job_id);
-		pthread_mutex_unlock(&sctx->work_lock);
-	}
-
 	ret = true;

 	if (!opt_extranonce)
@@ -1575,8 +1556,6 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p

 	if (!socket_full(sctx->sock, 3)) {
         applog(LOG_WARNING, "stratum extranonce subscribe timed out");
-//		if (opt_debug)
-//			applog(LOG_DEBUG, "stratum extranonce subscribe timed out");
 		goto out;
 	}

@@ -1607,204 +1586,6 @@ out:
 	return ret;
 }

-// -------------------- RPC 2.0 (XMR/AEON) -------------------------
-
-//extern pthread_mutex_t rpc2_login_lock;
-//extern pthread_mutex_t rpc2_job_lock;
-
-bool rpc2_login_decode(const json_t *val)
-{
-	const char *id;
-	const char *s;
-
-	json_t *res = json_object_get(val, "result");
-	if(!res) {
-		applog(LOG_ERR, "JSON invalid result");
-		goto err_out;
-	}
-
-	json_t *tmp;
-	tmp = json_object_get(res, "id");
-	if(!tmp) {
-		applog(LOG_ERR, "JSON inval id");
-		goto err_out;
-	}
-	id = json_string_value(tmp);
-	if(!id) {
-		applog(LOG_ERR, "JSON id is not a string");
-		goto err_out;
-	}
-
-	memcpy(&rpc2_id, id, 64);
-
-	if(opt_debug)
-		applog(LOG_DEBUG, "Auth id: %s", id);
-
-	tmp = json_object_get(res, "status");
-	if(!tmp) {
-		applog(LOG_ERR, "JSON inval status");
-		goto err_out;
-	}
-	s = json_string_value(tmp);
-	if(!s) {
-		applog(LOG_ERR, "JSON status is not a string");
-		goto err_out;
-	}
-	if(strcmp(s, "OK")) {
-		applog(LOG_ERR, "JSON returned status \"%s\"", s);
-		return false;
-	}
-
-	return true;
-
-err_out:
-	applog(LOG_WARNING,"%s: fail", __func__);
-	return false;
-}
-
-json_t* json_rpc2_call_recur(CURL *curl, const char *url, const char *userpass,
-	json_t *rpc_req, int *curl_err, int flags, int recur)
-{
-	if(recur >= 5) {
-		if(opt_debug)
-			applog(LOG_DEBUG, "Failed to call rpc command after %i tries", recur);
-		return NULL;
-	}
-	if(!strcmp(rpc2_id, "")) {
-		if(opt_debug)
-			applog(LOG_DEBUG, "Tried to call rpc2 command before authentication");
-		return NULL;
-	}
-	json_t *params = json_object_get(rpc_req, "params");
-	if (params) {
-		json_t *auth_id = json_object_get(params, "id");
-		if (auth_id) {
-			json_string_set(auth_id, rpc2_id);
-		}
-	}
-	json_t *res = json_rpc_call(curl, url, userpass, json_dumps(rpc_req, 0),
-			curl_err, flags | JSON_RPC_IGNOREERR);
-	if(!res) goto end;
-	json_t *error = json_object_get(res, "error");
-	if(!error) goto end;
-	json_t *message;
-	if(json_is_string(error))
-		message = error;
-	else
-		message = json_object_get(error, "message");
-	if(!message || !json_is_string(message)) goto end;
-	const char *mes = json_string_value(message);
-	if(!strcmp(mes, "Unauthenticated")) {
-		pthread_mutex_lock(&rpc2_login_lock);
-		rpc2_login(curl);
-		sleep(1);
-		pthread_mutex_unlock(&rpc2_login_lock);
-		return json_rpc2_call_recur(curl, url, userpass, rpc_req,
-				curl_err, flags, recur + 1);
-	} else if(!strcmp(mes, "Low difficulty share") || !strcmp(mes, "Block expired") || !strcmp(mes, "Invalid job id") || !strcmp(mes, "Duplicate share")) {
-		json_t *result = json_object_get(res, "result");
-		if(!result) {
-			goto end;
-		}
-		json_object_set(result, "reject-reason", json_string(mes));
-	} else {
-		applog(LOG_ERR, "json_rpc2.0 error: %s", mes);
-		return NULL;
-	}
-	end:
-	return res;
-}
-
-json_t *json_rpc2_call(CURL *curl, const char *url, const char *userpass, const char *rpc_req, int *curl_err, int flags)
-{
-	json_t* req_json = JSON_LOADS(rpc_req, NULL);
-	json_t* res = json_rpc2_call_recur(curl, url, userpass, req_json, curl_err, flags, 0);
-	json_decref(req_json);
-	return res;
-}
-
-bool rpc2_job_decode(const json_t *job, struct work *work)
-{
-	if (!jsonrpc_2) {
-		applog(LOG_ERR, "Tried to decode job without JSON-RPC 2.0");
-		return false;
-	}
-	json_t *tmp;
-	tmp = json_object_get(job, "job_id");
-	if (!tmp) {
-		applog(LOG_ERR, "JSON invalid job id");
-		goto err_out;
-	}
-	const char *job_id = json_string_value(tmp);
-	tmp = json_object_get(job, "blob");
-	if (!tmp) {
-		applog(LOG_ERR, "JSON invalid blob");
-		goto err_out;
-	}
-	const char *hexblob = json_string_value(tmp);
-	size_t blobLen = strlen(hexblob);
-	if (blobLen % 2 != 0 || ((blobLen / 2) < 40 && blobLen != 0) || (blobLen / 2) > 128) {
-		applog(LOG_ERR, "JSON invalid blob length");
-		goto err_out;
-	}
-	if (blobLen != 0) {
-		uint32_t target = 0;
-		pthread_mutex_lock(&rpc2_job_lock);
-		uchar *blob = (uchar*) malloc(blobLen / 2);
-		if (!hex2bin(blob, hexblob, blobLen / 2)) {
-			applog(LOG_ERR, "JSON invalid blob");
-			pthread_mutex_unlock(&rpc2_job_lock);
-			goto err_out;
-		}
-		rpc2_bloblen = blobLen / 2;
-		if (rpc2_blob) free(rpc2_blob);
-		rpc2_blob = (char*) malloc(rpc2_bloblen);
-		if (!rpc2_blob)  {
-			applog(LOG_ERR, "RPC2 OOM!");
-			goto err_out;
-		}
-		memcpy(rpc2_blob, blob, blobLen / 2);
-		free(blob);
-
-		jobj_binary(job, "target", &target, 4);
-		if(rpc2_target != target)
-                {
-   		   double hashrate = 0.0;
-                   pthread_mutex_lock(&stats_lock);
-		   for (int i = 0; i < opt_n_threads; i++)
-		      hashrate += thr_hashrates[i];
-                   pthread_mutex_unlock(&stats_lock);
-		   double diff = trunc( ( ((double)0xffffffff) / target ) );
-		   if ( !opt_quiet )
-		      // xmr pool diff can change a lot...
-		      applog(LOG_BLUE, "Stratum difficulty set to %g", diff);
-         work->stratum_diff = diff;
-         stratum_diff = diff;
-		   rpc2_target = target;
-		}
-
-		if (rpc2_job_id) free(rpc2_job_id);
-		rpc2_job_id = strdup(job_id);
-		pthread_mutex_unlock(&rpc2_job_lock);
-	}
-	if(work) {
-		if (!rpc2_blob) {
-			applog(LOG_WARNING, "Work requested before it was received");
-			goto err_out;
-		}
-		memcpy(work->data, rpc2_blob, rpc2_bloblen);
-		memset(work->target, 0xff, sizeof(work->target));
-		work->target[7] = rpc2_target;
-		if (work->job_id) free(work->job_id);
-		work->job_id = strdup(rpc2_job_id);
-	}
-	return true;
-
-err_out:
-	applog(LOG_WARNING, "%s", __func__);
-	return false;
-}
-
 /**
 * Extract bloc height     L H... here len=3, height=0x1333e8
 * "...0000000000ffffffff2703e83313062f503253482f043d61105408"
@@ -2298,13 +2079,6 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s)

 	params = json_object_get(val, "params");

-	if (jsonrpc_2) {
-		if (!strcasecmp(method, "job")) {
-			ret = rpc2_stratum_job(sctx, params);
-		}
-		goto out;
-	}
-
 	id = json_object_get(val, "id");

 	if (!strcasecmp(method, "mining.notify")) {
Author	SHA1	Message	Date
Jay D Dee	3da2b958cf	v3.12.2	2020-02-09 13:30:40 -05:00
Jay D Dee	dc2f8d81d3	v3.12.1	2020-02-07 20:18:20 -05:00
Jay D Dee	fc97ef174a	v3.12.0.1	2020-02-06 22:50:20 -05:00
Jay D Dee	13523a12f9	v3.12.0	2020-02-05 22:50:58 -05:00