v3.7.5

2026-07-14 19:06:50 +00:00 · 2017-12-08 15:39:28 -05:00
parent 4b57ac0eb9
commit af1c940919
53 changed files with 1324 additions and 4790 deletions
--- a/algo/lyra2/lyra2.c
+++ b/algo/lyra2/lyra2.c
@@ -377,7 +377,7 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd,
   uint64_t *wholeMatrix = _mm_malloc( i, 64 );
   if (wholeMatrix == NULL)
      return -1;
-
+/*
 #if defined (__AVX2__)
   memset_zero_m256i( (__m256i*)wholeMatrix, i/32 );
 #elif defined(__AVX__)
@@ -385,7 +385,7 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd,
 #else
   memset(wholeMatrix, 0, i);
 #endif
-
+*/
   uint64_t *ptrWord = wholeMatrix;

   //=== Getting the password + salt + basil padded with 10*1 ==========//
--- a/algo/lyra2/lyra2re.c
+++ b/algo/lyra2/lyra2re.c
@@ -128,34 +128,10 @@ void lyra2re_set_target ( struct work* work, double job_diff )
   work_set_target(work, job_diff / (128.0 * opt_diff_factor) );
 }

-/*
-bool lyra2re_thread_init()
-{
-   const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 8; // nCols
-   const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
-
-   int i = (int64_t)ROW_LEN_BYTES * 8; // nRows;
-   lyra2re_wholeMatrix = _mm_malloc( i, 64 );
-
-   if ( lyra2re_wholeMatrix == NULL )
-     return false;
-
-#if defined (__AVX2__)
-   memset_zero_m256i( (__m256i*)lyra2re_wholeMatrix, i/32 );
-#elif defined(__AVX__)
-   memset_zero_m128i( (__m128i*)lyra2re_wholeMatrix, i/16 );
-#else
-   memset( lyra2re_wholeMatrix, 0, i );
-#endif
-   return true;
-}
-*/
-
 bool register_lyra2re_algo( algo_gate_t* gate )
 {
  init_lyra2re_ctx();
  gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
-//  gate->miner_thread_init = (void*)&lyra2re_thread_init;
  gate->scanhash   = (void*)&scanhash_lyra2re;
  gate->hash       = (void*)&lyra2re_hash;
  gate->get_max64  = (void*)&lyra2re_get_max64;
--- a/algo/lyra2/lyra2rev2.c
+++ b/algo/lyra2/lyra2rev2.c
@@ -132,23 +132,13 @@ bool lyra2rev2_thread_init()
   int i = (int64_t)ROW_LEN_BYTES * 4; // nRows;
   l2v2_wholeMatrix = _mm_malloc( i, 64 );

-   if ( l2v2_wholeMatrix == NULL )
-     return false;
-
-#if defined (__AVX2__)
-   memset_zero_m256i( (__m256i*)l2v2_wholeMatrix, i/32 );
-#elif defined (__AVX__)
-   memset_zero_m128i( (__m128i*)l2v2_wholeMatrix, i/16 );
-#else
-   memset( l2v2_wholeMatrix, 0, i );
-#endif
-   return true;
+   return l2v2_wholeMatrix;
 }

 bool register_lyra2rev2_algo( algo_gate_t* gate )
 {
  init_lyra2rev2_ctx();
-  gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
+  gate->optimizations = AVX_OPT | AVX2_OPT;
  gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
  gate->scanhash          = (void*)&scanhash_lyra2rev2;
  gate->hash              = (void*)&lyra2rev2_hash;
--- a/algo/lyra2/lyra2z-4way.c
+++ b/algo/lyra2/lyra2z-4way.c
@@ -0,0 +1,168 @@
+#include "lyra2z-gate.h"
+
+#ifdef LYRA2Z_4WAY
+
+#include <memory.h>
+#include <mm_malloc.h>
+//#include "algo-gate-api.h"
+#include "lyra2.h"
+#include "algo/blake/sph_blake.h"
+#include "algo/blake/blake-hash-4way.h"
+//#include "avxdefs.h"
+
+// same size, only difference is the name, lyra2 is done serially
+__thread uint64_t* lyra2z_4way_matrix;
+
+bool lyra2z_4way_thread_init()
+{
+ return ( lyra2z_4way_matrix = _mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
+}
+
+static __thread blake256_4way_context l2z_4way_blake_mid;
+
+void lyra2z_4way_midstate( const void* input )
+{
+       blake256_4way_init( &l2z_4way_blake_mid );
+       blake256_4way( &l2z_4way_blake_mid, input, 64 );
+}
+
+// block 2050 new algo, blake plus new lyra parms. new input
+// is power of 2 so normal lyra can be used
+//void zcoin_hash(void *state, const void *input, uint32_t height)
+void lyra2z_4way_hash( void *state, const void *input )
+{
+//        uint32_t _ALIGN(64) hash[16];
+     uint32_t hash0[8] __attribute__ ((aligned (64)));
+     uint32_t hash1[8] __attribute__ ((aligned (64)));
+     uint32_t hash2[8] __attribute__ ((aligned (64)));
+     uint32_t hash3[8] __attribute__ ((aligned (64)));
+     uint32_t vhash[8*4] __attribute__ ((aligned (64)));
+     blake256_4way_context ctx_blake __attribute__ ((aligned (64)));
+
+//     memcpy( &ctx_blake, &l2z_4way_blake_mid, sizeof l2z_4way_blake_mid );
+//     blake256_4way( &ctx_blake, input + (64*4), 16 );
+//     blake256_4way_close( &ctx_blake, vhash );
+
+     blake256_4way_init( &ctx_blake );
+     blake256_4way( &ctx_blake, input, 80 );
+     blake256_4way_close( &ctx_blake, vhash );
+
+     mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
+
+     LYRA2Z( lyra2z_4way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
+//     LYRA2Z( lyra2z_4way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
+     LYRA2Z( lyra2z_4way_matrix, hash2, 32, hash2, 32, hash2, 32, 8, 8, 8 );
+//     LYRA2Z( lyra2z_4way_matrix, hash3, 32, hash3, 32, hash3, 32, 8, 8, 8 );
+
+     memcpy( state,    hash0, 32 );
+     memcpy( state+32, hash1, 32 );
+     memcpy( state+64, hash2, 32 );
+     memcpy( state+96, hash3, 32 );
+
+//    memcpy(state, hash, 32);
+}
+
+int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
+                          uint64_t *hashes_done )
+{
+   uint32_t hash[8*4] __attribute__ ((aligned (64)));
+   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
+//	uint32_t _ALIGN(64) hash[8];
+   uint32_t _ALIGN(64) edata[20];
+   uint32_t *pdata = work->data;
+   uint32_t *ptarget = work->target;
+   const uint32_t Htarg = ptarget[7];
+   const uint32_t first_nonce = pdata[19];
+   uint32_t n = first_nonce;
+   uint32_t *nonces = work->nonces;
+   bool *found = work->nfound;
+   int num_found = 0;
+   uint32_t *noncep0 = vdata + 76; // 19*4
+   uint32_t *noncep1 = vdata + 77;
+   uint32_t *noncep2 = vdata + 78;
+   uint32_t *noncep3 = vdata + 79;
+
+   if ( opt_benchmark )
+      ptarget[7] = 0x0000ff;
+
+   for ( int i=0; i < 19; i++ )
+      be32enc( &edata[i], pdata[i] );
+
+   mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
+
+//   lyra2z_4way_midstate( vdata );
+
+   do {
+      found[0] = found[1] = found[2] = found[3] = false;
+      be32enc( noncep0, n   );
+      be32enc( noncep1, n+1 );
+      be32enc( noncep2, n+2 );
+      be32enc( noncep3, n+3 );
+
+      be32enc( &edata[19], n );
+      lyra2z_4way_hash( hash, vdata );
+
+      if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
+      {
+printf("found 0\n");
+          found[0] = true;
+          num_found++;
+          nonces[0] = pdata[19] = n;
+          work_set_target_ratio( work, hash );
+      }
+/*      if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
+      {
+printf("found 1\n");          
+          found[1] = true;
+          num_found++;
+          nonces[1] = n+1;
+          work_set_target_ratio( work, hash+8 );
+      }
+*/
+      if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
+      {
+printf("found 2\n");          
+          found[2] = true;
+          num_found++;
+          nonces[2] = n+2;
+          work_set_target_ratio( work, hash+16 );
+      }
+/*
+      if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
+      {
+printf("found 3\n");          
+          found[3] = true;
+          num_found++;
+          nonces[3] = n+3;
+          work_set_target_ratio( work, hash+24 );
+      }
+      n += 4;
+*/
+      n += 2;
+   } while ( (num_found == 0) && (n < max_nonce-4)
+                   && !work_restart[thr_id].restart);
+
+   *hashes_done = n - first_nonce + 1;
+   return num_found;
+}
+
+#endif
+
+/*
+
+		if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
+			work_set_target_ratio(work, hash);
+			pdata[19] = nonce;
+			*hashes_done = pdata[19] - first_nonce;
+			return 1;
+		}
+		nonce++;
+
+	} while (nonce < max_nonce && !work_restart[thr_id].restart);
+
+	pdata[19] = nonce;
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
+*/
+
--- a/algo/lyra2/lyra2z-gate.c
+++ b/algo/lyra2/lyra2z-gate.c
@@ -0,0 +1,28 @@
+#include "lyra2z-gate.h"
+#include "lyra2.h"
+
+void lyra2z_set_target( struct work* work, double job_diff )
+{
+ work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
+}
+
+bool register_lyra2z_algo( algo_gate_t* gate )
+{
+#ifdef LYRA2Z_4WAY
+  four_way_not_tested();
+  gate->optimizations = AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
+  gate->miner_thread_init = (void*)&lyra2z_4way_thread_init;
+  gate->scanhash   = (void*)&scanhash_lyra2z_4way;
+  gate->hash       = (void*)&lyra2z_4way_hash;
+#else
+  gate->optimizations = AVX_OPT | AVX2_OPT;
+  gate->miner_thread_init = (void*)&lyra2z_thread_init;
+  gate->scanhash   = (void*)&scanhash_lyra2z;
+  gate->hash       = (void*)&lyra2z_hash;
+#endif
+
+  gate->get_max64  = (void*)&get_max64_0xffffLL;
+  gate->set_target = (void*)&lyra2z_set_target;
+  return true;
+};
+
--- a/algo/lyra2/lyra2z-gate.h
+++ b/algo/lyra2/lyra2z-gate.h
@@ -0,0 +1,33 @@
+#ifndef LYRA2Z_GATE_H__
+#define LYRA2Z_GATE_H__
+
+#include "algo-gate-api.h"
+#include <stdint.h>
+
+#if defined(HASH_4WAY)
+  #define LYRA2Z_4WAY
+#endif
+
+
+#define LYRA2Z_MATRIX_SIZE  BLOCK_LEN_INT64 * 8 * 8 * 8
+
+#if defined(LYRA2Z_4WAY)
+
+void lyra2z_4way_hash( void *state, const void *input );
+
+int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
+                         uint64_t *hashes_done );
+
+bool lyra2z_4way_thread_init();
+
+#endif
+
+void lyra2z_hash( void *state, const void *input );
+
+int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
+                    uint64_t *hashes_done );
+
+bool lyra2z_thread_init();
+
+#endif
+
--- a/algo/lyra2/lyra2z.c
+++ b/algo/lyra2/lyra2z.c
@@ -1,40 +1,49 @@
 #include <memory.h>
 #include <mm_malloc.h>
-#include "algo-gate-api.h"
+#include "lyra2z-gate.h"
 #include "lyra2.h"
 #include "algo/blake/sph_blake.h"
 #include "avxdefs.h"

-__thread uint64_t* zcoin_wholeMatrix;
+__thread uint64_t* lyra2z_matrix;

-static __thread sph_blake256_context zcoin_blake_mid;
-
-
-void zcoin_midstate( const void* input )
+bool lyra2z_thread_init()
 {
-       sph_blake256_init( &zcoin_blake_mid );
-       sph_blake256( &zcoin_blake_mid, input, 64 );
+//   const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 8; // nCols
+//   const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
+//   int i = (int64_t)ROW_LEN_BYTES * 8; // nRows;
+   const int i = BLOCK_LEN_INT64 * 8 * 8 * 8;
+   lyra2z_matrix = _mm_malloc( i, 64 );
+   return lyra2z_matrix;
+}
+
+static __thread sph_blake256_context lyra2z_blake_mid;
+
+void lyra2z_midstate( const void* input )
+{
+       sph_blake256_init( &lyra2z_blake_mid );
+       sph_blake256( &lyra2z_blake_mid, input, 64 );
 }

 // block 2050 new algo, blake plus new lyra parms. new input
 // is power of 2 so normal lyra can be used
 //void zcoin_hash(void *state, const void *input, uint32_t height)
-void zcoin_hash(void *state, const void *input )
+void lyra2z_hash( void *state, const void *input )
 {
        uint32_t _ALIGN(64) hash[16];

        sph_blake256_context ctx_blake __attribute__ ((aligned (64)));

-        memcpy( &ctx_blake, &zcoin_blake_mid, sizeof zcoin_blake_mid );
+        memcpy( &ctx_blake, &lyra2z_blake_mid, sizeof lyra2z_blake_mid );
        sph_blake256( &ctx_blake, input + 64, 16 );
        sph_blake256_close( &ctx_blake, hash );

-        LYRA2Z( zcoin_wholeMatrix, hash, 32, hash, 32, hash, 32, 8, 8, 8);
+        LYRA2Z( lyra2z_matrix, hash, 32, hash, 32, hash, 32, 8, 8, 8);

    memcpy(state, hash, 32);
 }

-int scanhash_zcoin( int thr_id, struct work *work, uint32_t max_nonce,
+int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
                    uint64_t *hashes_done )
 {
 	uint32_t _ALIGN(64) hash[8];
@@ -52,11 +61,11 @@ int scanhash_zcoin( int thr_id, struct work *work, uint32_t max_nonce,
 		be32enc(&endiandata[i], pdata[i]);
 	}

-        zcoin_midstate( endiandata );
+        lyra2z_midstate( endiandata );

 	do {
 		be32enc(&endiandata[19], nonce);
-                zcoin_hash( hash, endiandata );
+                lyra2z_hash( hash, endiandata );

 		if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
 			work_set_target_ratio(work, hash);
@@ -73,50 +82,41 @@ int scanhash_zcoin( int thr_id, struct work *work, uint32_t max_nonce,
 	return 0;
 }

+/*
 //int64_t get_max64_0xffffLL() { return 0xffffLL; };

-void zcoin_set_target( struct work* work, double job_diff )
+void lyra2z_set_target( struct work* work, double job_diff )
 {
 work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
 }
-/*
+
 bool zcoin_get_work_height( struct work* work, struct stratum_ctx* sctx )
 {
   work->height = sctx->bloc_height;
   return false;
 }
-*/

-bool zcoin_thread_init()
+
+bool lyra2z_thread_init()
 {
   const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 8; // nCols
   const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;

   int i = (int64_t)ROW_LEN_BYTES * 8; // nRows;
-   zcoin_wholeMatrix = _mm_malloc( i, 64 );
+   lyra2z_wholeMatrix = _mm_malloc( i, 64 );

-   if ( zcoin_wholeMatrix == NULL )
-     return false;
-
-#if defined (__AVX2__)
-   memset_zero_m256i( (__m256i*)zcoin_wholeMatrix, i/32 );
-#elif defined(__AVX__)
-   memset_zero_m128i( (__m128i*)zcoin_wholeMatrix, i/16 );
-#else
-   memset( zcoin_wholeMatrix, 0, i );
-#endif
-   return true;
+   return lyra2z_wholeMatrix;
 }

-bool register_zcoin_algo( algo_gate_t* gate )
+bool register_lyra2z_algo( algo_gate_t* gate )
 {
  gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
-  gate->miner_thread_init = (void*)&zcoin_thread_init;
-  gate->scanhash   = (void*)&scanhash_zcoin;
-  gate->hash       = (void*)&zcoin_hash;
+  gate->miner_thread_init = (void*)&lyra2z_thread_init;
+  gate->scanhash   = (void*)&scanhash_lyra2z;
+  gate->hash       = (void*)&lyra2z_hash;
  gate->get_max64  = (void*)&get_max64_0xffffLL;
-  gate->set_target = (void*)&zcoin_set_target;
+  gate->set_target = (void*)&lyra2z_set_target;
 //  gate->prevent_dupes = (void*)&zcoin_get_work_height;
  return true;
 };
-
+*/
--- a/algo/lyra2/lyra2z330.c
+++ b/algo/lyra2/lyra2z330.c
@@ -64,22 +64,12 @@ bool lyra2z330_thread_init()
   int i = (int64_t)ROW_LEN_BYTES * 330; // nRows;
   lyra2z330_wholeMatrix = _mm_malloc( i, 64 );

-   if ( lyra2z330_wholeMatrix == NULL )
-     return false;
-
-#if defined (__AVX2__)
-   memset_zero_m256i( (__m256i*)lyra2z330_wholeMatrix, i/32 );
-#elif defined(__AVX__)
-   memset_zero_m128i( (__m128i*)lyra2z330_wholeMatrix, i/16 );
-#else
-   memset( lyra2z330_wholeMatrix, 0, i );
-#endif
-   return true;
+   return lyra2z330_wholeMatrix;
 }

 bool register_lyra2z330_algo( algo_gate_t* gate )
 {
-  gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
+  gate->optimizations = AVX_OPT | AVX2_OPT;
  gate->miner_thread_init = (void*)&lyra2z330_thread_init;
  gate->scanhash   = (void*)&scanhash_lyra2z330;
  gate->hash       = (void*)&lyra2z330_hash;
--- a/algo/lyra2/sponge.c
+++ b/algo/lyra2/sponge.c
@@ -130,12 +130,12 @@ inline void squeeze( uint64_t *State, byte *Out, unsigned int len )
    //Squeezes full blocks
    for ( i = 0; i < fullBlocks; i++ )
    {
-       memcpy_m256i( out, state, BLOCK_LEN_M256I );
+       memcpy_256( out, state, BLOCK_LEN_M256I );
       LYRA_ROUND_AVX2( state[0], state[1], state[2], state[3] );
       out += BLOCK_LEN_M256I;
    }
    //Squeezes remaining bytes
-    memcpy_m256i( out, state, ( len_m256i % BLOCK_LEN_M256I ) );
+    memcpy_256( out, state, ( len_m256i % BLOCK_LEN_M256I ) );

 #elif defined (__AVX__)

@@ -148,13 +148,13 @@ inline void squeeze( uint64_t *State, byte *Out, unsigned int len )
    //Squeezes full blocks
    for ( i = 0; i < fullBlocks; i++ )
    {
-       memcpy_m128i( out, state, BLOCK_LEN_M128I );
+       memcpy_128( out, state, BLOCK_LEN_M128I );
       LYRA_ROUND_AVX( state[0], state[1], state[2], state[3],
                       state[4], state[5], state[6], state[7] );
       out += BLOCK_LEN_M128I;
    }
    //Squeezes remaining bytes
-    memcpy_m128i( out, state, ( len_m128i % BLOCK_LEN_M128I ) );
+    memcpy_128( out, state, ( len_m128i % BLOCK_LEN_M128I ) );

 #else

--- a/algo/lyra2/sponge.h
+++ b/algo/lyra2/sponge.h
@@ -66,11 +66,11 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
 #define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
   G_4X64( s0, s1, s2, s3 ); \
   s1 = mm256_rotl256_1x64( s1); \
-   s2 = mm256_swap128( s2 ); \
+   s2 = mm256_swap_128( s2 ); \
   s3 = mm256_rotr256_1x64( s3 ); \
   G_4X64( s0, s1, s2, s3 ); \
   s1 = mm256_rotr256_1x64( s1 ); \
-   s2 = mm256_swap128( s2 ); \
+   s2 = mm256_swap_128( s2 ); \
   s3 = mm256_rotl256_1x64( s3 );

 #define LYRA_12_ROUNDS_AVX2( s0, s1, s2, s3 ) \
@@ -105,14 +105,14 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
 #define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
   G_2X64( s0, s2, s4, s6 ); \
   G_2X64( s1, s3, s5, s7 ); \
-   mm128_rotl256_1x64( s2, s3 ); \
-   mm128_swap128( s4, s5 ); \
-   mm128_rotr256_1x64( s6, s7 ); \
+   mm_rotl256_1x64( s2, s3 ); \
+   mm_swap_128( s4, s5 ); \
+   mm_rotr256_1x64( s6, s7 ); \
   G_2X64( s0, s2, s4, s6 ); \
   G_2X64( s1, s3, s5, s7 ); \
-   mm128_rotr256_1x64( s2, s3 ); \
-   mm128_swap128( s4, s5 ); \
-   mm128_rotl256_1x64( s6, s7 );
+   mm_rotr256_1x64( s2, s3 ); \
+   mm_swap_128( s4, s5 ); \
+   mm_rotl256_1x64( s6, s7 );

 #define LYRA_12_ROUNDS_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
   LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \