v3.9.2.5

2026-07-14 19:06:50 +00:00 · 2019-06-13 11:20:27 -04:00
parent 7fec680835
commit b2331375a3
70 changed files with 4413 additions and 4360 deletions
--- a/algo/sha/sha2-hash-4way.h
+++ b/algo/sha/sha2-hash-4way.h
@@ -42,7 +42,7 @@

 #include <stddef.h>
 #include "sph_types.h"
-#include "avxdefs.h"
+#include "simd-utils.h"

 #if defined(__SSE2__)
 //#if defined(__SSE4_2__)
--- a/algo/sha/sha256_hash_11way.c
+++ b/algo/sha/sha256_hash_11way.c
@@ -1,3 +1,4 @@
+#if 0

 #include <stddef.h>
 #include <string.h>
@@ -65,7 +66,7 @@ static const uint32_t K256[64] =
   _mm_xor_si64( _mm_xor_si64( \
       mm64_ror_32(x,2), mm64_ror_32(x,13) ), _mm_srli_pi32(x,22) )

-#define BSG2_0z(x)  ( ror_32(x,2) ^ ror_32(x,13)  ^ ((x)>>22) )
+#define BSG2_0z(x)  ( u32_ror_32(x,2) ^ u32_ror_32(x,13)  ^ ((x)>>22) )

 #define BSG2_1x(x) \
   _mm256_xor_si256( _mm256_xor_si256( \
@@ -75,7 +76,7 @@ static const uint32_t K256[64] =
   _mm_xor_si64( _mm_xor_si64( \
       mm64_ror_32(x,6), mm64_ror_32(x,11) ), _mm_srli_pi32(x,25) )

-#define BSG2_1z(x)   ( ror_32(x,6) ^ ror_32(x,11) ^ ((x)>>25) )
+#define BSG2_1z(x)   ( u32_ror_32(x,6) ^ u32_ror_32(x,11) ^ ((x)>>25) )

 #define SSG2_0x(x) \
   _mm256_xor_si256( _mm256_xor_si256( \
@@ -85,7 +86,7 @@ static const uint32_t K256[64] =
   _mm_xor_si64( _mm_xor_si64( \
       mm64_ror_32(x,7), mm64_ror_32(x,18) ), _mm_srli_pi32(x,3) )

-#define SSG2_0z(x)  (( ror_32(x,7) ^ ror_32(x,18) ) ^ ((x)>>3) )
+#define SSG2_0z(x)  (( u32_ror_32(x,7) ^ u32_ror_32(x,18) ) ^ ((x)>>3) )

 #define SSG2_1x(x) \
   _mm256_xor_si256( _mm256_xor_si256( \
@@ -95,7 +96,7 @@ static const uint32_t K256[64] =
   _mm_xor_si64( _mm_xor_si64( \
       mm64_ror_32(x,17), mm64_ror_32(x,19) ), _mm_srli_pi32(x,10) )

-#define SSG2_1z(x)   ( ror_32(x,17) ^ ror_32(x,19)  ^ ((x)>>10) )
+#define SSG2_1z(x)   ( u32_ror_32(x,17) ^ u32_ror_32(x,19)  ^ ((x)>>10) )

 #define SHA2x_MEXP( a, b, c, d ) \
     _mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( \
@@ -449,7 +450,7 @@ void sha256_11way_update( sha256_11way_context *ctx, const void *datax,
      if ( clen > len )
         clen = len;
      memcpy_256( ctx->bufx + (ptr>>2), vdatax + (ptr>>2), clen>>2 );
-      memcpy_64 ( ctx->bufy + (ptr>>2), vdatay + (ptr>>2), clen>>2 );
+      memcpy_m64( ctx->bufy + (ptr>>2), vdatay + (ptr>>2), clen>>2 );
      memcpy    ( ctx->bufz +  ptr,     idataz +  ptr,     clen    );
      ptr += clen;
      len -= clen;
@@ -486,19 +487,19 @@ void sha256_11way_close( sha256_11way_context *ctx, void *dstx, void *dsty,
    if ( ptr > pad )
    {
         memset_zero_256( ctx->bufx + (ptr>>2), (buf_size - ptr) >> 2 );
-         memset_zero_64(  ctx->bufy + (ptr>>2), (buf_size - ptr) >> 2 );
+         memset_zero_m64( ctx->bufy + (ptr>>2), (buf_size - ptr) >> 2 );
         memset(      ctx->bufz + (ptr>>2), 0,  (buf_size - ptr) >> 2 );
         sha256_11way_round( ctx->bufx, ctx->valx,
 			     ctx->bufy, ctx->valy,
 			     ctx->bufz, ctx->valz );
         memset_zero_256( ctx->bufx, pad >> 2 );
-         memset_zero_64(  ctx->bufy, pad >> 2 );
+         memset_zero_m64(  ctx->bufy, pad >> 2 );
         memset(      ctx->bufz, 0,  pad >> 2 );
    }
    else
    {
        memset_zero_256( ctx->bufx + (ptr>>2),    (pad - ptr) >> 2 );
-        memset_zero_64(  ctx->bufy + (ptr>>2),    (pad - ptr) >> 2 );
+        memset_zero_m64(  ctx->bufy + (ptr>>2),    (pad - ptr) >> 2 );
        memset(          ctx->bufz + (ptr>>2), 0, (pad - ptr) >> 2 );
    }

@@ -534,3 +535,4 @@ void sha256_11way_close( sha256_11way_context *ctx, void *dstx, void *dsty,
 }

 #endif
+#endif   // 0
--- a/algo/sha/sha256q-4way.c
+++ b/algo/sha/sha256q-4way.c
@@ -36,7 +36,6 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
 {
   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
   uint32_t hash[8*8] __attribute__ ((aligned (32)));
-   uint32_t edata[20] __attribute__ ((aligned (32)));;
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   const uint32_t Htarg = ptarget[7];
@@ -59,12 +58,7 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
                                        0 };

   // Need big endian data
-   casti_m256i( edata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
-   casti_m256i( edata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
-   casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
-
-   mm256_interleave_8x32( vdata, edata, edata, edata, edata,
-                                 edata, edata, edata, edata, 640 );
+   mm256_bswap_intrlv80_8x32( vdata, pdata );
   sha256_8way_init( &sha256_ctx8 );
   sha256_8way( &sha256_ctx8, vdata, 64 );

@@ -73,11 +67,10 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
      uint32_t mask = masks[m];
      do
      {
-        *noncev = mm256_bswap_32(
-		 _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
-
-	 pdata[19] = n;
+         *noncev = mm256_bswap_32(
+		            _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );

+	      pdata[19] = n;
         sha256q_8way_hash( hash, vdata );

         uint32_t *hash7 = &(hash[7<<3]); 
@@ -86,27 +79,19 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
         if ( !( hash7[ lane ] & mask ) )
         { 
            // deinterleave hash for lane
-	    uint32_t lane_hash[8];
-	    mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
+	         uint32_t lane_hash[8];
+	         mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );

-	    if ( fulltest( lane_hash, ptarget ) )
+	         if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
            {
-	      pdata[19] = n + lane;
-              work_set_target_ratio( work, lane_hash );
-              if ( submit_work( mythr, work ) )
-                applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
-                             accepted_share_count + rejected_share_count + 1,
-                             thr_id, lane );
-              else
-                applog( LOG_WARNING, "Failed to submit share." );
-	    }
-	 }
+	           pdata[19] = n + lane;
+              submit_solution( work, lane_hash, mythr, lane );
+            }
+	      }
         n += 8;
-
      } while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
      break;
   }
-    
   *hashes_done = n - first_nonce + 1;
   return 0;
 }
@@ -146,7 +131,6 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
   uint32_t hash[8*4] __attribute__ ((aligned (32)));
   uint32_t *hash7 = &(hash[7<<2]);
   uint32_t lane_hash[8];
-   uint32_t edata[20] __attribute__ ((aligned (32)));;
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   const uint32_t Htarg = ptarget[7];
@@ -168,13 +152,7 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
                               0xFFFF0000,
                                        0 };

-   casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
-   casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
-   casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
-   casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
-   casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
-
-   mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
+   mm128_bswap_intrlv80_4x32( vdata, pdata );
   sha256_4way_init( &sha256_ctx4 );
   sha256_4way( &sha256_ctx4, vdata, 64 );

@@ -183,7 +161,7 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
      uint32_t mask = masks[m];
      do {
         *noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) );
-	 pdata[19] = n;
+         pdata[19] = n;

         sha256q_4way_hash( hash, vdata );

@@ -192,25 +170,16 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
         {
            mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );

-            if ( fulltest( lane_hash, ptarget ) )
+            if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
            {
              pdata[19] = n + lane;
-              work_set_target_ratio( work, lane_hash );
-              if ( submit_work( mythr, work ) )
-                applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
-                             accepted_share_count + rejected_share_count + 1,
-                             thr_id, lane );
-              else
-                applog( LOG_WARNING, "Failed to submit share." );
+              submit_solution( work, lane_hash, mythr, lane );
            }
         }
-
-	 n += 4;
-
+         n += 4;
      } while ( (n < max_nonce - 4) && !work_restart[thr_id].restart );
      break;
   }
-
   *hashes_done = n - first_nonce + 1;
   return 0;
 }
--- a/algo/sha/sha256t-4way.c
+++ b/algo/sha/sha256t-4way.c
@@ -72,7 +72,7 @@ int scanhash_sha256t_11way( int thr_id, struct work *work, uint32_t max_nonce,
   casti_m256i( dataz, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
   casti_m128i( dataz, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );

-   mm256_interleave_8x32( datax, dataz, dataz, dataz, dataz,
+   mm256_intrlv_8x32( datax, dataz, dataz, dataz, dataz,
                                 dataz, dataz, dataz, dataz, 640 );
   mm64_interleave_2x32( datay, dataz, dataz, 640 );

@@ -156,15 +156,15 @@ void sha256t_8way_hash( void* output, const void* input )
   sha256_8way_init( &ctx );
   sha256_8way( &ctx, vhash, 32 );
   sha256_8way_close( &ctx, output );
-
 }

 int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
                           uint64_t *hashes_done, struct thr_info *mythr )
 {
-   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
-   uint32_t hash[8*8] __attribute__ ((aligned (32)));
-   uint32_t edata[20] __attribute__ ((aligned (32)));;
+   uint32_t vdata[20*8]  __attribute__ ((aligned (64)));
+   uint32_t hash[8*8]    __attribute__ ((aligned (32)));
+   uint32_t lane_hash[8] __attribute__ ((aligned (32)));
+   uint32_t *hash7 = &(hash[7<<3]);
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   const uint32_t Htarg = ptarget[7];
@@ -187,12 +187,7 @@ int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
                                        0 };

   // Need big endian data
-   casti_m256i( edata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
-   casti_m256i( edata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
-   casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
-
-   mm256_interleave_8x32( vdata, edata, edata, edata, edata,
-                                 edata, edata, edata, edata, 640 );
+   mm256_bswap_intrlv80_8x32( vdata, pdata );
   sha256_8way_init( &sha256_ctx8 );
   sha256_8way( &sha256_ctx8, vdata, 64 );

@@ -201,29 +196,22 @@ int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
      uint32_t mask = masks[m];
      do
      {
-        *noncev = mm256_bswap_32(
-                 _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
+        *noncev = mm256_bswap_32( _mm256_set_epi32(
+                                          n+7,n+6,n+5,n+4,n+3,n+2,n+1,n ) );
         pdata[19] = n;
-
         sha256t_8way_hash( hash, vdata );
-
-         uint32_t *hash7 = &(hash[7<<3]);
-
         for ( int lane = 0; lane < 8; lane++ )
         if ( !( hash7[ lane ] & mask ) )
         {
            // deinterleave hash for lane
-            uint32_t lane_hash[8] __attribute__ ((aligned (64)));
            mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
-
-            if ( fulltest( lane_hash, ptarget ) )
+            if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
            {
              pdata[19] = n + lane;
              submit_solution( work, lane_hash, mythr, lane );
-	    }
+	         }
         }
         n += 8;
-
      } while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
      break;
   }
@@ -253,7 +241,6 @@ void sha256t_4way_hash( void* output, const void* input )
   sha256_4way_init( &ctx );
   sha256_4way( &ctx, vhash, 32 );
   sha256_4way_close( &ctx, output );
-
 }

 int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
@@ -262,7 +249,6 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
   uint32_t hash[8*4] __attribute__ ((aligned (32)));
   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
-   uint32_t edata[20] __attribute__ ((aligned (32)));;
   uint32_t *hash7 = &(hash[7<<2]);
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
@@ -278,20 +264,14 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
                                   0xFFF,
                                  0xFFFF,
                              0x10000000 };
-   const uint32_t masks[] = {  0xFFFFFFFF,
-                               0xFFFFFFF0,
-                               0xFFFFFF00,
-                               0xFFFFF000,
-                               0xFFFF0000,
-                                        0 };
+   const uint32_t masks[] = { 0xFFFFFFFF,
+                              0xFFFFFFF0,
+                              0xFFFFFF00,
+                              0xFFFFF000,
+                              0xFFFF0000,
+                                       0 };

-   casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
-   casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
-   casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
-   casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
-   casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
-
-   mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
+   mm128_bswap_intrlv80_4x32( vdata, pdata );
   sha256_4way_init( &sha256_ctx4 );
   sha256_4way( &sha256_ctx4, vdata, 64 );

@@ -300,7 +280,7 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
      uint32_t mask = masks[m];
      do {
         *noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) );
-	 pdata[19] = n;
+         pdata[19] = n;

         sha256t_4way_hash( hash, vdata );

@@ -308,15 +288,13 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
         if ( !( hash7[ lane ] & mask ) )
         {
            mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
-
-            if ( fulltest( lane_hash, ptarget ) )
+            if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
            {
              pdata[19] = n + lane;
              submit_solution( work, lane_hash, mythr, lane );
-	    }
-         }
-	 n += 4;
-
+	         }
+         } 
+         n += 4;
      } while ( (n < max_nonce - 4) && !work_restart[thr_id].restart );
      break;
   }
--- a/algo/sha/sha256t-gate.c
+++ b/algo/sha/sha256t-gate.c
@@ -2,11 +2,7 @@

 bool register_sha256t_algo( algo_gate_t* gate )
 {
-#if defined(SHA256T_11WAY)
-    gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
-    gate->scanhash   = (void*)&scanhash_sha256t_11way;
-    gate->hash       = (void*)&sha256t_11way_hash;
-#elif defined(SHA256T_8WAY)
+#if defined(SHA256T_8WAY)
    gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
    gate->scanhash   = (void*)&scanhash_sha256t_8way;
    gate->hash       = (void*)&sha256t_8way_hash;
@@ -25,11 +21,7 @@ gate->optimizations = SHA_OPT;

 bool register_sha256q_algo( algo_gate_t* gate )
 {
-#if defined(SHA256T_8WAY)
-    gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
-    gate->scanhash   = (void*)&scanhash_sha256q_8way;
-    gate->hash       = (void*)&sha256q_8way_hash;
-#elif defined(SHA256T_4WAY)
+#if defined(SHA256T_4WAY)
    gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
    gate->scanhash   = (void*)&scanhash_sha256q_4way;
    gate->hash       = (void*)&sha256q_4way_hash;
--- a/algo/sha/sha256t-gate.h
+++ b/algo/sha/sha256t-gate.h
@@ -11,24 +11,12 @@
 #endif
 #if defined(__AVX2__)
  #define SHA256T_8WAY
-//  #define SHA256T_11WAY
 #endif
 #endif

 bool register_sha256t_algo( algo_gate_t* gate );
 bool register_sha256q_algo( algo_gate_t* gate );

-#if defined(SHA256T_11WAY)
-
-void sha256t_11way_hash( void *outx, void *outy, void *outz, const void *inpx,
-	                 const void *inpy, const void *inpz );
-int scanhash_sha256t_11way( int thr_id, struct work *work, uint32_t max_nonce,
-                            uint64_t *hashes_done, struct thr_info *mythr );
-//void sha256q_8way_hash( void *output, const void *input );
-//int scanhash_sha256q_11way( int thr_id, struct work *work, uint32_t max_nonce,
-//                            uint64_t *hashes_done, struct thr_info *mythr );
-#endif
-
 #if defined(SHA256T_8WAY)

 void sha256t_8way_hash( void *output, const void *input );