v3.9.1

2026-07-14 19:06:50 +00:00 · 2019-05-30 16:59:49 -04:00
parent eb3f57bfc7
commit 77c5ae80ab
82 changed files with 6906 additions and 3706 deletions
--- a/algo/lyra2/allium-4way.c
+++ b/algo/lyra2/allium-4way.c
@@ -55,11 +55,11 @@ void allium_4way_hash( void *state, const void *input )
   LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );

   cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*)hash0, 32 );
-   cubehashReinit( &ctx.cube );
+   cubehashInit( &ctx.cube, 256, 16, 32 );
   cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*)hash1, 32 );
-   cubehashReinit( &ctx.cube );
+   cubehashInit( &ctx.cube, 256, 16, 32 );
   cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*)hash2, 32 );
-   cubehashReinit( &ctx.cube );
+   cubehashInit( &ctx.cube, 256, 16, 32 );
   cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*)hash3, 32 );

   LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
--- a/algo/lyra2/lyra2-gate.c
+++ b/algo/lyra2/lyra2-gate.c
@@ -27,7 +27,7 @@ bool register_lyra2rev3_algo( algo_gate_t* gate )
  gate->scanhash  = (void*)&scanhash_lyra2rev3;
  gate->hash      = (void*)&lyra2rev3_hash;
 #endif
-  gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
+  gate->optimizations = SSE2_OPT | SSE42_OPT | AVX2_OPT;
  gate->miner_thread_init = (void*)&lyra2rev3_thread_init;
  gate->set_target        = (void*)&alt_set_target;
  return true;
--- a/algo/lyra2/lyra2-gate.h
+++ b/algo/lyra2/lyra2-gate.h
@@ -17,14 +17,14 @@ bool register_lyra2rev3_algo( algo_gate_t* gate );

 void lyra2rev3_4way_hash( void *state, const void *input );
 int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
-                             uint64_t *hashes_done );
+                             uint64_t *hashes_done, struct thr_info *mythr );
 bool init_lyra2rev3_4way_ctx();

 #else

 void lyra2rev3_hash( void *state, const void *input );
 int scanhash_lyra2rev3( int thr_id, struct work *work, uint32_t max_nonce,
-                        uint64_t *hashes_done );
+                        uint64_t *hashes_done, struct thr_info *mythr );
 bool init_lyra2rev3_ctx();

 #endif
--- a/algo/lyra2/lyra2re.c
+++ b/algo/lyra2/lyra2re.c
@@ -7,8 +7,7 @@
 #include "lyra2.h"
 #include "algo-gate-api.h"
 #include "avxdefs.h"
-
-#ifndef NO_AES_NI
+#if defined(__AES__)
  #include "algo/groestl/aes_ni/hash-groestl256.h"
 #endif

@@ -18,10 +17,10 @@ typedef struct {
        sph_blake256_context     blake;
        sph_keccak256_context    keccak;
        sph_skein256_context     skein;
-#ifdef NO_AES_NI
-        sph_groestl256_context   groestl;
-#else
+#if defined(__AES__)
        hashState_groestl256     groestl;
+#else
+        sph_groestl256_context   groestl;
 #endif
 } lyra2re_ctx_holder;

@@ -33,10 +32,10 @@ void init_lyra2re_ctx()
        sph_blake256_init(&lyra2re_ctx.blake);
        sph_keccak256_init(&lyra2re_ctx.keccak);
        sph_skein256_init(&lyra2re_ctx.skein);
-#ifdef NO_AES_NI
-        sph_groestl256_init(&lyra2re_ctx.groestl);
-#else
+#if defined(__AES__)
        init_groestl256( &lyra2re_ctx.groestl, 32 );
+#else
+        sph_groestl256_init(&lyra2re_ctx.groestl);
 #endif
 }

@@ -72,11 +71,11 @@ void lyra2re_hash(void *state, const void *input)
 	sph_skein256(&ctx.skein, hashA, 32);
 	sph_skein256_close(&ctx.skein, hashB);

-#ifdef NO_AES_NI
+#if defined(__AES__)
+        update_and_final_groestl256( &ctx.groestl, hashA, hashB, 256 );
+#else
 	sph_groestl256( &ctx.groestl, hashB, 32 );
 	sph_groestl256_close( &ctx.groestl, hashA );
-#else
-        update_and_final_groestl256( &ctx.groestl, hashA, hashB, 256 );
 #endif

 	memcpy(state, hashA, 32);
--- a/algo/lyra2/lyra2rev2-4way.c
+++ b/algo/lyra2/lyra2rev2-4way.c
@@ -48,11 +48,11 @@ void lyra2rev2_4way_hash( void *state, const void *input )
   mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );

   cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
-   cubehashReinit( &ctx.cube );
+   cubehashInit( &ctx.cube, 256, 16, 32 );
   cubehashUpdateDigest( &ctx.cube, (byte*) hash1, (const byte*) hash1, 32 );
-   cubehashReinit( &ctx.cube );
+   cubehashInit( &ctx.cube, 256, 16, 32 );
   cubehashUpdateDigest( &ctx.cube, (byte*) hash2, (const byte*) hash2, 32 );
-   cubehashReinit( &ctx.cube );
+   cubehashInit( &ctx.cube, 256, 16, 32 );
   cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );

   LYRA2REV2( l2v2_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
@@ -65,13 +65,13 @@ void lyra2rev2_4way_hash( void *state, const void *input )
   skein256_4way_close( &ctx.skein, vhash64 );
   mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );

-   cubehashReinit( &ctx.cube );
+   cubehashInit( &ctx.cube, 256, 16, 32 );
   cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
-   cubehashReinit( &ctx.cube );
+   cubehashInit( &ctx.cube, 256, 16, 32 );
   cubehashUpdateDigest( &ctx.cube, (byte*) hash1, (const byte*) hash1, 32 );
-   cubehashReinit( &ctx.cube );
+   cubehashInit( &ctx.cube, 256, 16, 32 );
   cubehashUpdateDigest( &ctx.cube, (byte*) hash2, (const byte*) hash2, 32 );
-   cubehashReinit( &ctx.cube );
+   cubehashInit( &ctx.cube, 256, 16, 32 );
   cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );

   mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
--- a/algo/lyra2/lyra2rev3-4way.c
+++ b/algo/lyra2/lyra2rev3-4way.c
@@ -43,11 +43,11 @@ void lyra2rev3_4way_hash( void *state, const void *input )
   LYRA2REV3( l2v3_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
   
   cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
-   cubehashReinit( &ctx.cube );
+   cubehashInit( &ctx.cube, 256, 16, 32 );
   cubehashUpdateDigest( &ctx.cube, (byte*) hash1, (const byte*) hash1, 32 );
-   cubehashReinit( &ctx.cube );
+   cubehashInit( &ctx.cube, 256, 16, 32 );
   cubehashUpdateDigest( &ctx.cube, (byte*) hash2, (const byte*) hash2, 32 );
-   cubehashReinit( &ctx.cube );
+   cubehashInit( &ctx.cube, 256, 16, 32 );
   cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );

   LYRA2REV3( l2v3_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
@@ -57,54 +57,67 @@ void lyra2rev3_4way_hash( void *state, const void *input )

   mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
   bmw256_4way( &ctx.bmw, vhash, 32 );
-   bmw256_4way_close( &ctx.bmw, vhash );
+   bmw256_4way_close( &ctx.bmw, state );

-   mm128_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
 }

 int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
-                             uint64_t *hashes_done )
+                             uint64_t *hashes_done, struct thr_info *mythr ) 
 {
   uint32_t hash[8*4] __attribute__ ((aligned (64)));
   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
   uint32_t edata[20] __attribute__ ((aligned (64)));
+   uint32_t *hash7 = &(hash[7<<2]);
+   uint32_t lane_hash[8];
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
   uint32_t n = first_nonce;
   const uint32_t Htarg = ptarget[7];
-   uint32_t *nonces = work->nonces;
   int num_found = 0;
-   uint32_t *noncep = vdata + 76; // 19*4
-
+   __m128i  *noncev = (__m128i*)vdata + 19;   // aligned
+   /* int */ thr_id = mythr->id;  // thr_id arg is deprecated
+   
   if ( opt_benchmark )
      ( (uint32_t*)ptarget )[7] = 0x0000ff;

-   swab32_array( edata, pdata, 20 );
+   // Need big endian data
+   casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
+   casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
+   casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
+   casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
+   casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
+
   mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );

-   do {
-      be32enc( noncep,   n   );
-      be32enc( noncep+1, n+1 );
-      be32enc( noncep+2, n+2 );
-      be32enc( noncep+3, n+3 );
+   do
+   {
+      *noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );

      lyra2rev3_4way_hash( hash, vdata );
      pdata[19] = n;

-      for ( int i = 0; i < 4; i++ )
-      if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
+      for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
      {
-          pdata[19] = n+i;         
-          nonces[ num_found++ ] = n+i;
-          work_set_target_ratio( work, hash+(i<<3) );
+         mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
+
+         if ( fulltest( lane_hash, ptarget ) )
+         {
+              pdata[19] = n + lane;    
+              work_set_target_ratio( work, lane_hash );
+              if ( submit_work( mythr, work ) )
+                applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
+		             accepted_share_count + rejected_share_count + 1,
+			     thr_id, lane );
+              else
+                applog( LOG_WARNING, "Failed to submit share." );
+	 }
      }
      n += 4;
-   } while ( (num_found == 0) && (n < max_nonce-4)
-                   && !work_restart[thr_id].restart);
+   } while ( (n < max_nonce-4) && !work_restart[thr_id].restart);

   *hashes_done = n - first_nonce + 1;
-   return num_found;
+   return 0;
 }

 #endif
--- a/algo/lyra2/lyra2rev3.c
+++ b/algo/lyra2/lyra2rev3.c
@@ -8,7 +8,6 @@

 typedef struct {
        cubehashParam           cube;
-//        cubehashParam           cube2;
        sph_blake256_context     blake;
        sph_bmw256_context       bmw;

@@ -20,7 +19,6 @@ static __thread sph_blake256_context l2v3_blake_mid;
 bool init_lyra2rev3_ctx()
 {
        cubehashInit( &lyra2v3_ctx.cube, 256, 16, 32 );
-//        cubehashInit( &lyra2v3_ctx.cube2, 256, 16, 32 );
        sph_blake256_init( &lyra2v3_ctx.blake );
        sph_bmw256_init( &lyra2v3_ctx.bmw );
        return true;
@@ -59,44 +57,51 @@ void lyra2rev3_hash( void *state, const void *input )
 	memcpy( state, hash, 32 );
 }

-int scanhash_lyra2rev3(int thr_id, struct work *work,
-	uint32_t max_nonce, uint64_t *hashes_done)
+int scanhash_lyra2rev3( int thr_id, struct work *work,
+	uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
 {
-        uint32_t *pdata = work->data;
-        uint32_t *ptarget = work->target;
-	uint32_t endiandata[20] __attribute__ ((aligned (64)));
-        uint32_t hash[8] __attribute__((aligned(64)));
-	const uint32_t first_nonce = pdata[19];
-	uint32_t nonce = first_nonce;
-        const uint32_t Htarg = ptarget[7];
+   uint32_t *pdata = work->data;
+   uint32_t *ptarget = work->target;
+   uint32_t endiandata[20] __attribute__ ((aligned (64)));
+   uint32_t hash[8] __attribute__((aligned(64)));
+   const uint32_t first_nonce = pdata[19];
+   uint32_t nonce = first_nonce;
+   const uint32_t Htarg = ptarget[7];
+   /* int */ thr_id = mythr->id;  // thr_id arg is deprecated

-	if (opt_benchmark)
-		((uint32_t*)ptarget)[7] = 0x0000ff;
+   if (opt_benchmark)
+	((uint32_t*)ptarget)[7] = 0x0000ff;

-        swab32_array( endiandata, pdata, 20 );
+   // need big endian data
+   casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
+   casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
+   casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
+   casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
+   casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );

-        l2v3_blake256_midstate( endiandata );
+   l2v3_blake256_midstate( endiandata );

-	do {
-		be32enc(&endiandata[19], nonce);
-		lyra2rev3_hash(hash, endiandata);
+   do
+   {
+	be32enc(&endiandata[19], nonce);
+	lyra2rev3_hash(hash, endiandata);

-		if (hash[7] <= Htarg )
-                {
-                   if( fulltest(hash, ptarget) )
-                   {
-			pdata[19] = nonce;
-                        work_set_target_ratio( work, hash );
-			*hashes_done = pdata[19] - first_nonce;
-		   	return 1;
-		   }
-                }
-		nonce++;
+	if (hash[7] <= Htarg )
+        {
+            if( fulltest(hash, ptarget) )
+            {
+		pdata[19] = nonce;
+                work_set_target_ratio( work, hash );
+                *hashes_done = pdata[19] - first_nonce;
+		return 1;
+	    }
+         }
+         nonce++;

-	} while (nonce < max_nonce && !work_restart[thr_id].restart);
+   } while (nonce < max_nonce && !work_restart[thr_id].restart);

-	pdata[19] = nonce;
-	*hashes_done = pdata[19] - first_nonce + 1;
-	return 0;
+   pdata[19] = nonce;
+   *hashes_done = pdata[19] - first_nonce + 1;
+   return 0;
 }

--- a/algo/lyra2/sponge.h
+++ b/algo/lyra2/sponge.h
@@ -91,7 +91,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
   LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
   LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \

-#elif defined(__SSE4_2__)
+#elif defined(__SSE2__)

 // process 2 columns in parallel
 // returns void, all args updated
@@ -108,14 +108,14 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
 #define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
   G_2X64( s0, s2, s4, s6 ); \
   G_2X64( s1, s3, s5, s7 ); \
-   mm128_ror256_1x64( s2, s3 ); \
-   mm128_swap256_128( s4, s5 ); \
-   mm128_rol256_1x64( s6, s7 ); \
+   mm128_rol1x64_256( s2, s3 ); \
+   mm128_swap128_256( s4, s5 ); \
+   mm128_rol1x64_256( s6, s7 ); \
   G_2X64( s0, s2, s4, s6 ); \
   G_2X64( s1, s3, s5, s7 ); \
-   mm128_rol256_1x64( s2, s3 ); \
-   mm128_swap256_128( s4, s5 ); \
-   mm128_ror256_1x64( s6, s7 );
+   mm128_rol1x64_256( s2, s3 ); \
+   mm128_swap128_256( s4, s5 ); \
+   mm128_ror1x64_256( s6, s7 );

 #define LYRA_12_ROUNDS_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
   LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \