v3.12.2

2026-07-14 19:06:50 +00:00 · 2020-02-09 13:30:40 -05:00
parent dc2f8d81d3
commit 3da2b958cf
39 changed files with 1496 additions and 1518 deletions
--- a/algo/x17/sonoa-4way.c
+++ b/algo/x17/sonoa-4way.c
--- a/algo/x17/sonoa.c
+++ b/algo/x17/sonoa.c
@@ -563,59 +563,31 @@ void sonoa_hash( void *state, const void *input )
 }

 int scanhash_sonoa( struct work *work, uint32_t max_nonce,
-	            uint64_t *hashes_done, struct thr_info *mythr )
+             uint64_t *hashes_done, struct thr_info *mythr)
 {
-   uint32_t _ALIGN(128) hash32[8];
-   uint32_t _ALIGN(128) endiandata[20];
+   uint32_t edata[20] __attribute__((aligned(64)));
+   uint32_t hash64[8] __attribute__((aligned(64)));
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
+   uint32_t n = pdata[19];
   const uint32_t first_nonce = pdata[19];
-   const uint32_t Htarg = ptarget[7];
-   uint32_t n = pdata[19] - 1;
-   int thr_id = mythr->id;  // thr_id arg is deprecated
+   const int thr_id = mythr->id;
+   const bool bench = opt_benchmark;

-   uint64_t htmax[] =
+   mm128_bswap32_80( edata, pdata );
+
+   do
   {
-	0,
-	0xF,
-	0xFF,
-	0xFFF,
-	0xFFFF,
-	0x10000000
-   };
-   uint32_t masks[] =
-   {
-	0xFFFFFFFF,
-	0xFFFFFFF0,
-	0xFFFFFF00,
-	0xFFFFF000,
-	0xFFFF0000,
-	0
-   };
-
-
-   // we need bigendian data...
-   casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
-   casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
-   casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
-   casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
-   casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
-
-   for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
-   {
-      uint32_t mask = masks[m];
-      do
+      edata[19] = n;
+      sonoa_hash( hash64, edata );
+      if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
      {
-         pdata[19] = ++n;
-         be32enc(&endiandata[19], n);
-         sonoa_hash(hash32, endiandata);
-         if ( !( hash32[7] & mask ) )
-         if ( fulltest( hash32, ptarget ) && !opt_benchmark )
-            submit_solution( work, hash32, mythr );
-	   } while (n < max_nonce && !work_restart[thr_id].restart);
-	   break;
-	}
-   *hashes_done = n - first_nonce + 1;
+         pdata[19] = bswap_32( n );
+         submit_solution( work, hash64, mythr );
+      }
+      n++;
+   } while ( n < max_nonce && !work_restart[thr_id].restart );
+   *hashes_done = n - first_nonce;
   pdata[19] = n;
   return 0;
 }
--- a/algo/x17/x17-4way.c
+++ b/algo/x17/x17-4way.c
@@ -74,9 +74,7 @@ void x17_8way_hash( void *state, const void *input )

     blake512_8way_full( &ctx.blake, vhash, input, 80 );

-     bmw512_8way_init( &ctx.bmw );
-     bmw512_8way_update( &ctx.bmw, vhash, 64 );
-     bmw512_8way_close( &ctx.bmw, vhash );
+     bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );

 #if defined(__VAES__)

@@ -106,9 +104,7 @@ void x17_8way_hash( void *state, const void *input )

 #endif

-     skein512_8way_init( &ctx.skein );
-     skein512_8way_update( &ctx.skein, vhash, 64 );
-     skein512_8way_close( &ctx.skein, vhash );
+     skein512_8way_full( &ctx.skein, vhash, vhash, 64 );

     jh512_8way_init( &ctx.jh );
     jh512_8way_update( &ctx.jh, vhash, 64 );
@@ -290,12 +286,12 @@ int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
   uint32_t hash[8*8] __attribute__ ((aligned (128)));
   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-   uint32_t *hash32 = &(hash[7*8]);
+   uint32_t *hashd7 = &(hash[7*8]);
   uint32_t *pdata = work->data;
   const uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
   const uint32_t last_nonce = max_nonce - 8;
-   __m512i  *noncev = (__m512i*)vdata + 9;   // aligned
+   __m512i  *noncev = (__m512i*)vdata + 9; 
   uint32_t n = first_nonce;
   const int thr_id = mythr->id;
   const uint32_t targ32 = ptarget[7];
@@ -310,7 +306,7 @@ int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
      x17_8way_hash( hash, vdata );

      for ( int lane = 0; lane < 8; lane++ )
-      if ( unlikely( ( hash32[ lane ] <= targ32 ) && !bench ) )
+      if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
      {
         extr_lane_8x32( lane_hash, hash, lane, 256 );
         if ( likely( valid_hash( lane_hash, ptarget ) ) )
@@ -378,9 +374,7 @@ void x17_4way_hash( void *state, const void *input )

     intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );

-     skein512_4way_init( &ctx.skein );
-     skein512_4way_update( &ctx.skein, vhash, 64 );
-     skein512_4way_close( &ctx.skein, vhash );
+     skein512_4way_full( &ctx.skein, vhash, vhash, 64 );

     jh512_4way_init( &ctx.jh );
     jh512_4way_update( &ctx.jh, vhash, 64 );
@@ -477,7 +471,7 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
   uint32_t hash[8*4] __attribute__ ((aligned (64)));
   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-   uint32_t *hash32 = &(hash[ 7*4 ]);
+   uint32_t *hashd7 = &(hash[ 7*4 ]);
   uint32_t *pdata = work->data;
   const uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
@@ -496,7 +490,7 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
      x17_4way_hash( hash, vdata );

      for ( int lane = 0; lane < 4; lane++ )
-      if ( unlikely( hash32[ lane ] <= targ32 && !bench ) )
+      if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
      {  
         extr_lane_4x32( lane_hash, hash, lane, 256 );
         if ( valid_hash( lane_hash, ptarget ) )
--- a/algo/x17/x17.c
+++ b/algo/x17/x17.c
@@ -169,8 +169,8 @@ int scanhash_x17( struct work *work, uint32_t max_nonce,
         submit_solution( work, hash64, mythr );
      }
      n++;
-   } while ( n < max_nonce && !work_restart[thr_id].restart);
-   *hashes_done = n - first_nonce + 1;
+   } while ( n < max_nonce && !work_restart[thr_id].restart );
+   *hashes_done = n - first_nonce;
   pdata[19] = n;
   return 0;
 }
--- a/algo/x17/xevan-4way.c
+++ b/algo/x17/xevan-4way.c
@@ -76,9 +76,7 @@ void xevan_8way_hash( void *output, const void *input )
     blake512_8way_full( &ctx.blake, vhash, input, 80 );
     memset( &vhash[8<<3], 0, 64<<3 );

-     bmw512_8way_init( &ctx.bmw );
-     bmw512_8way_update( &ctx.bmw, vhash, dataLen );
-     bmw512_8way_close( &ctx.bmw, vhash );
+     bmw512_8way_full( &ctx.bmw, vhash, vhash, dataLen );

 #if defined(__VAES__)

@@ -108,9 +106,7 @@ void xevan_8way_hash( void *output, const void *input )

 #endif

-     skein512_8way_init( &ctx.skein );
-     skein512_8way_update( &ctx.skein, vhash, dataLen );
-     skein512_8way_close( &ctx.skein, vhash );
+     skein512_8way_full( &ctx.skein, vhash, vhash, dataLen );

     jh512_8way_init( &ctx.jh );
     jh512_8way_update( &ctx.jh, vhash, dataLen );
@@ -291,9 +287,7 @@ void xevan_8way_hash( void *output, const void *input )

     blake512_8way_full( &ctx.blake, vhash, vhash, dataLen );

-     bmw512_8way_init( &ctx.bmw );
-     bmw512_8way_update( &ctx.bmw, vhash, dataLen );
-     bmw512_8way_close( &ctx.bmw, vhash );
+     bmw512_8way_full( &ctx.bmw, vhash, vhash, dataLen );

 #if defined(__VAES__)

@@ -323,9 +317,7 @@ void xevan_8way_hash( void *output, const void *input )

 #endif

-     skein512_8way_init( &ctx.skein );
-     skein512_8way_update( &ctx.skein, vhash, dataLen );
-     skein512_8way_close( &ctx.skein, vhash );
+     skein512_8way_full( &ctx.skein, vhash, vhash, dataLen );

     jh512_8way_init( &ctx.jh );
     jh512_8way_update( &ctx.jh, vhash, dataLen );
@@ -504,40 +496,43 @@ void xevan_8way_hash( void *output, const void *input )
 int scanhash_xevan_8way( struct work *work, uint32_t max_nonce,
                       uint64_t *hashes_done, struct thr_info *mythr )
 {
-   uint32_t hash[8*16] __attribute__ ((aligned (128)));
-   uint32_t vdata[24*8] __attribute__ ((aligned (64)));
+   uint32_t hash[8*8] __attribute__ ((aligned (128)));
+   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-   uint32_t *hash7 = &(hash[7<<3]);
+   uint32_t *hashd7 = &(hash[7*8]);
   uint32_t *pdata = work->data;
   const uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
   const uint32_t last_nonce = max_nonce - 8;
-   __m512i  *noncev = (__m512i*)vdata + 9;   // aligned
+   __m512i  *noncev = (__m512i*)vdata + 9;
   uint32_t n = first_nonce;
   const int thr_id = mythr->id;
-   const uint32_t Htarg = ptarget[7];
+   const uint32_t targ32 = ptarget[7];
+   const bool bench = opt_benchmark;

   mm512_bswap32_intrlv80_8x64( vdata, pdata );
+   *noncev = mm512_intrlv_blend_32(
+              _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
+                                n+3, 0, n+2, 0, n+1, 0, n,   0 ), *noncev );
   do
   {
-      *noncev = mm512_intrlv_blend_32( mm512_bswap_32(
-              _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
-                                n+3, 0, n+2, 0, n+1, 0, n,   0 ) ), *noncev );
      xevan_8way_hash( hash, vdata );

      for ( int lane = 0; lane < 8; lane++ )
-      if unlikely( ( hash7[ lane ] <= Htarg ) )
+      if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
      {
         extr_lane_8x32( lane_hash, hash, lane, 256 );
-         if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
+         if ( likely( valid_hash( lane_hash, ptarget ) ) )
         {
-            pdata[19] = n + lane;
+            pdata[19] = bswap_32( n + lane );
            submit_lane_solution( work, lane_hash, mythr, lane );
         }
      }
+      *noncev = _mm512_add_epi32( *noncev,
+                                  m512_const1_64( 0x0000000800000000 ) );
      n += 8;
   } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
-
+   pdata[19] = n;
   *hashes_done = n - first_nonce;
   return 0;
 }
@@ -578,8 +573,6 @@ void xevan_4way_hash( void *output, const void *input )
     const int dataLen = 128;
     xevan_4way_context_overlay ctx __attribute__ ((aligned (64)));

-     // parallel 4 way
-
     blake512_4way_full( &ctx.blake, vhash, input, 80 );
     memset( &vhash[8<<2], 0, 64<<2 );

@@ -598,9 +591,7 @@ void xevan_4way_hash( void *output, const void *input )
     // Parallel 4way
     intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );

-     skein512_4way_init( &ctx.skein );
-     skein512_4way_update( &ctx.skein, vhash, dataLen );
-     skein512_4way_close( &ctx.skein, vhash );
+     skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );

     jh512_4way_init( &ctx.jh );
     jh512_4way_update( &ctx.jh, vhash, dataLen );
@@ -618,15 +609,11 @@ void xevan_4way_hash( void *output, const void *input )
     cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
     cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );

-     shavite512_2way_init( &ctx.shavite );
-     shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
-     shavite512_2way_init( &ctx.shavite );
-     shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
+     shavite512_2way_full( &ctx.shavite, vhashA, vhashA, dataLen );
+     shavite512_2way_full( &ctx.shavite, vhashB, vhashB, dataLen );

-     simd_2way_init( &ctx.simd, 512 );
-     simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
-     simd_2way_init( &ctx.simd, 512 );
-     simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
+     simd512_2way_full( &ctx.simd, vhashA, vhashA, dataLen );
+     simd512_2way_full( &ctx.simd, vhashB, vhashB, dataLen );

     dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
     dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
@@ -718,9 +705,7 @@ void xevan_4way_hash( void *output, const void *input )

     intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );

-     skein512_4way_init( &ctx.skein );
-     skein512_4way_update( &ctx.skein, vhash, dataLen );
-     skein512_4way_close( &ctx.skein, vhash );
+     skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );

     jh512_4way_init( &ctx.jh );
     jh512_4way_update( &ctx.jh, vhash, dataLen );
@@ -738,15 +723,11 @@ void xevan_4way_hash( void *output, const void *input )
     cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
     cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );

-     shavite512_2way_init( &ctx.shavite );
-     shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
-     shavite512_2way_init( &ctx.shavite );
-     shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
+     shavite512_2way_full( &ctx.shavite, vhashA, vhashA, dataLen );
+     shavite512_2way_full( &ctx.shavite, vhashB, vhashB, dataLen );

-     simd_2way_init( &ctx.simd, 512 );
-     simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
-     simd_2way_init( &ctx.simd, 512 );
-     simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
+     simd512_2way_full( &ctx.simd, vhashA, vhashA, dataLen );
+     simd512_2way_full( &ctx.simd, vhashB, vhashB, dataLen );

     dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
     dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
@@ -818,41 +799,43 @@ void xevan_4way_hash( void *output, const void *input )
 int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
                         uint64_t *hashes_done, struct thr_info *mythr )
 {
-   uint32_t hash[4*16] __attribute__ ((aligned (64)));
-   uint32_t vdata[24*4] __attribute__ ((aligned (64)));
-   uint32_t lane_hash[8] __attribute__ ((aligned (32)));
-   uint32_t *hash7 = &(hash[7<<2]);
+   uint32_t hash[16*4] __attribute__ ((aligned (128)));
+   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
+   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
+   uint32_t *hashd7 = &(hash[7<<2]);
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   int thr_id = mythr->id;
-   __m256i  *noncev = (__m256i*)vdata + 9;   // aligned
-
-   const uint32_t Htarg = ptarget[7];
+   __m256i  *noncev = (__m256i*)vdata + 9; 
+   const uint32_t targ32 = ptarget[7];
   const uint32_t first_nonce = pdata[19];
+   const uint32_t last_nonce = max_nonce - 4;
   uint32_t n = first_nonce;
+   const bool bench = opt_benchmark;

-   if ( opt_benchmark )
-      ptarget[7] = 0x0cff;
+   if ( bench )  ptarget[7] = 0x0cff;

   mm256_bswap32_intrlv80_4x64( vdata, pdata );
+   *noncev = mm256_intrlv_blend_32(
+                   _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
   do {
-      *noncev = mm256_intrlv_blend_32( mm256_bswap_32(
-               _mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ), *noncev );
-
      xevan_4way_hash( hash, vdata );
      for ( int lane = 0; lane < 4; lane++ )
-      if ( hash7[ lane ] <= Htarg )
+      if ( unlikely( hashd7[ lane ] <= targ32 ) && ! bench )
      {
         extr_lane_4x32( lane_hash, hash, lane, 256 );
-	      if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
+	      if ( valid_hash( lane_hash, ptarget ) )
         {
-             pdata[19] = n + lane;
+             pdata[19] = bswap_32( n + lane );
             submit_lane_solution( work, lane_hash, mythr, lane );
         }
      }
+      *noncev = _mm256_add_epi32( *noncev,
+                                  m256_const1_64( 0x0000000400000000 ) );
      n += 4;
-   } while ( ( n < max_nonce-4 ) && !work_restart[thr_id].restart );
-   *hashes_done = n - first_nonce + 1;
+   } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
+   pdata[19] = n;
+   *hashes_done = n - first_nonce;
   return 0;
 }

--- a/algo/x17/xevan.c
+++ b/algo/x17/xevan.c
@@ -56,8 +56,6 @@ typedef struct {
 } xevan_ctx_holder;

 xevan_ctx_holder xevan_ctx __attribute__ ((aligned (64)));
-static __thread sph_blake512_context xevan_blake_mid
-                                        __attribute__ ((aligned (64)));

 void init_xevan_ctx()
 {
@@ -85,34 +83,23 @@ void init_xevan_ctx()
 #endif
 };

-void xevan_blake512_midstate( const void* input )
-{
-    memcpy( &xevan_blake_mid, &xevan_ctx.blake, sizeof xevan_blake_mid );
-    sph_blake512( &xevan_blake_mid, input, 64 );
-}
-
 void xevan_hash(void *output, const void *input)
 {
-        uint32_t _ALIGN(64) hash[32]; // 128 bytes required
+   uint32_t _ALIGN(64) hash[32]; // 128 bytes required
 	const int dataLen = 128;
-        xevan_ctx_holder ctx __attribute__ ((aligned (64)));
-        memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
-
-        const int midlen = 64;            // bytes
-        const int tail   = 80 - midlen;   // 16
-
-        memcpy( &ctx.blake, &xevan_blake_mid, sizeof xevan_blake_mid );
-        sph_blake512( &ctx.blake, input + midlen, tail );
-	sph_blake512_close(&ctx.blake, hash);
+   xevan_ctx_holder ctx __attribute__ ((aligned (64)));
+   memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );

+   sph_blake512( &ctx.blake, input, 80 );
+   sph_blake512_close( &ctx.blake, hash );
 	memset(&hash[16], 0, 64);

 	sph_bmw512(&ctx.bmw, hash, dataLen);
 	sph_bmw512_close(&ctx.bmw, hash);

 #if defined(__AES__)
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
-                                  (const char*)hash, dataLen*8 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
+                                     (const char*)hash, dataLen*8 );
 #else
 	sph_groestl512(&ctx.groestl, hash, dataLen);
 	sph_groestl512_close(&ctx.groestl, hash);
@@ -127,20 +114,20 @@ void xevan_hash(void *output, const void *input)
 	sph_keccak512(&ctx.keccak, hash, dataLen);
 	sph_keccak512_close(&ctx.keccak, hash);

-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
-                                (const BitSequence*)hash, dataLen );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+                                 (const BitSequence*)hash, dataLen );

-        cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
-                              (const byte*) hash, dataLen );
+   cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
+                                 (const byte*) hash, dataLen );

 	sph_shavite512(&ctx.shavite, hash, dataLen);
 	sph_shavite512_close(&ctx.shavite, hash);

-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                         (const BitSequence *)hash, dataLen*8 );

 #if defined(__AES__)
-        update_final_echo( &ctx.echo, (BitSequence *) hash,
+   update_final_echo( &ctx.echo, (BitSequence *) hash,
                           (const BitSequence *) hash, dataLen*8 );
 #else
 	sph_echo512(&ctx.echo, hash, dataLen);
@@ -159,15 +146,15 @@ void xevan_hash(void *output, const void *input)
 	sph_whirlpool(&ctx.whirlpool, hash, dataLen);
 	sph_whirlpool_close(&ctx.whirlpool, hash);

-        SHA512_Update( &ctx.sha512, hash, dataLen );
-        SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
+   SHA512_Update( &ctx.sha512, hash, dataLen );
+   SHA512_Final( (unsigned char*) hash, &ctx.sha512 );

 	sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
 	sph_haval256_5_close(&ctx.haval, hash);

 	memset(&hash[8], 0, dataLen - 32);

-        memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
+   memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );

 	sph_blake512(&ctx.blake, hash, dataLen);
 	sph_blake512_close(&ctx.blake, hash);
@@ -176,11 +163,11 @@ void xevan_hash(void *output, const void *input)
 	sph_bmw512_close(&ctx.bmw, hash);

 #if defined(__AES__)
-        update_and_final_groestl( &ctx.groestl, (char*)hash,
-                                  (const BitSequence*)hash, dataLen*8 );
+   update_and_final_groestl( &ctx.groestl, (char*)hash,
+                              (const BitSequence*)hash, dataLen*8 );
 #else
 	sph_groestl512(&ctx.groestl, hash, dataLen);
-        sph_groestl512_close(&ctx.groestl, hash);
+   sph_groestl512_close(&ctx.groestl, hash);
 #endif

 	sph_skein512(&ctx.skein, hash, dataLen);
@@ -191,24 +178,25 @@ void xevan_hash(void *output, const void *input)

 	sph_keccak512(&ctx.keccak, hash, dataLen);
 	sph_keccak512_close(&ctx.keccak, hash);
-        update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
-                                (const BitSequence*)hash, dataLen );

-        cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
-                              (const byte*) hash, dataLen );
+   update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
+                                 (const BitSequence*)hash, dataLen );
+
+   cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
+                                 (const byte*) hash, dataLen );

 	sph_shavite512(&ctx.shavite, hash, dataLen);
 	sph_shavite512_close(&ctx.shavite, hash);

-        update_final_sd( &ctx.simd, (BitSequence *)hash,
+   update_final_sd( &ctx.simd, (BitSequence *)hash,
                         (const BitSequence *)hash, dataLen*8 );

 #if defined(__AES__)
-        update_final_echo( &ctx.echo, (BitSequence *) hash,
+   update_final_echo( &ctx.echo, (BitSequence *) hash,
                           (const BitSequence *) hash, dataLen*8 );
 #else
-        sph_echo512(&ctx.echo, hash, dataLen);
-        sph_echo512_close(&ctx.echo, hash);
+   sph_echo512(&ctx.echo, hash, dataLen);
+   sph_echo512_close(&ctx.echo, hash);
 #endif

 	sph_hamsi512(&ctx.hamsi, hash, dataLen);
@@ -223,8 +211,8 @@ void xevan_hash(void *output, const void *input)
 	sph_whirlpool(&ctx.whirlpool, hash, dataLen);
 	sph_whirlpool_close(&ctx.whirlpool, hash);

-        SHA512_Update( &ctx.sha512, hash, dataLen );
-        SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
+   SHA512_Update( &ctx.sha512, hash, dataLen );
+   SHA512_Final( (unsigned char*) hash, &ctx.sha512 );

 	sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
 	sph_haval256_5_close(&ctx.haval, hash);
@@ -233,41 +221,33 @@ void xevan_hash(void *output, const void *input)
 }

 int scanhash_xevan( struct work *work, uint32_t max_nonce,
-	            uint64_t *hashes_done, struct thr_info *mythr )
+             uint64_t *hashes_done, struct thr_info *mythr)
 {
-	uint32_t _ALIGN(64) hash[8];
-	uint32_t _ALIGN(64) endiandata[20];
-	uint32_t *pdata = work->data;
-	uint32_t *ptarget = work->target;
-   int thr_id = mythr->id;  // thr_id arg is deprecated
-	const uint32_t Htarg = ptarget[7];
-	const uint32_t first_nonce = pdata[19];
-	uint32_t nonce = first_nonce;
-	volatile uint8_t *restart = &(work_restart[thr_id].restart);
+   uint32_t edata[20] __attribute__((aligned(64)));
+   uint32_t hash64[8] __attribute__((aligned(64)));
+   uint32_t *pdata = work->data;
+   uint32_t *ptarget = work->target;
+   uint32_t n = pdata[19];
+   const uint32_t first_nonce = pdata[19];
+   const int thr_id = mythr->id;
+   const bool bench = opt_benchmark;

-	if (opt_benchmark)
-		ptarget[7] = 0x0cff;
+   mm128_bswap32_80( edata, pdata );

-	for (int k=0; k < 19; k++)
-		be32enc(&endiandata[k], pdata[k]);
-
-   xevan_blake512_midstate( endiandata );
-	do {
-		be32enc(&endiandata[19], nonce);
-		xevan_hash(hash, endiandata);
-
-		if (hash[7] <= Htarg )
-      if ( fulltest( hash, ptarget ) && !opt_benchmark )
-	   {
-         pdata[19] = nonce;
-         submit_solution( work, hash, mythr );
-		}
-		nonce++;
-	} while ( nonce < max_nonce && !(*restart) );
-
-	pdata[19] = nonce;
-	*hashes_done = pdata[19] - first_nonce + 1;
-	return 0;
+   do
+   {
+      edata[19] = n;
+      xevan_hash( hash64, edata );
+      if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
+      {
+         pdata[19] = bswap_32( n );
+         submit_solution( work, hash64, mythr );
+      }
+      n++;
+   } while ( n < max_nonce && !work_restart[thr_id].restart );
+   pdata[19] = n;
+   *hashes_done = n - first_nonce;
+   return 0;
 }

 #endif