v3.9.1.1

2025-09-17 23:44:27 +00:00 · 2019-05-31 13:20:12 -04:00
parent 77c5ae80ab
commit 02202ab803
19 changed files with 106 additions and 48 deletions
--- a/algo/hodl/sha512_avx.c
+++ b/algo/hodl/sha512_avx.c
@@ -11,6 +11,10 @@
 #include <sys/endian.h>
 #endif 

+#if defined(__CYGWIN__)
+#include <endian.h>
+#endif
+
 #include "tmmintrin.h"
 #include "smmintrin.h"

--- a/algo/hodl/sha512_avx2.c
+++ b/algo/hodl/sha512_avx2.c
@@ -8,6 +8,10 @@
 #include <sys/endian.h>
 #endif 

+#if defined(__CYGWIN__)
+#include <endian.h>
+#endif
+
 #include "tmmintrin.h"
 #include "smmintrin.h"
 #include "immintrin.h"
--- a/algo/keccak/sse2/keccak.c
+++ b/algo/keccak/sse2/keccak.c
@@ -91,7 +91,7 @@ extern "C"{
 #pragma warning (disable: 4146)
 #endif

-
+/*
 static const sph_u64 RC[] = {
 	SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
 	SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
@@ -106,7 +106,7 @@ static const sph_u64 RC[] = {
 	SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
 	SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
 };
-
+*/
 #define kekDECL_STATE \
 	sph_u64 keca00, keca01, keca02, keca03, keca04; \
 	sph_u64 keca10, keca11, keca12, keca13, keca14; \
@@ -756,6 +756,20 @@ static const sph_u64 RC[] = {
 * tested faster saving space
 */
 #define KECCAK_F_1600_   do { \
+static const sph_u64 RC[] = { \
+        SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082), \
+        SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000), \
+        SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001), \
+        SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009), \
+        SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088), \
+        SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A), \
+        SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B), \
+        SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003), \
+        SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080), \
+        SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A), \
+        SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080), \
+        SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008) \
+}; \
 		int j; \
 		for (j = 0; j < 24; j += 4) { \
 			KF_ELT( 0,  1, RC[j + 0]); \
@@ -791,7 +805,7 @@ static const sph_u64 RC[] = {
 /* load initial constants */
 #define KEC_I 

-static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; 
+//static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; 
 /*
 unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; \
 */
@@ -799,6 +813,7 @@ static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0
 /* load hash for loop */
 #define KEC_U \
 do { \
+static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; \
    /*memcpy(hashbuf, hash, 64); */ \
    memcpy(hash + 64, keczword, 8); \
 } while (0); 
--- a/algo/lyra2/lyra2-gate.h
+++ b/algo/lyra2/lyra2-gate.h
@@ -57,7 +57,7 @@ bool init_lyra2rev2_ctx();

 /////////////////////////

-#if defined(__SSE4_2__)
+#if defined(__SSE2__)
  #define LYRA2Z_4WAY
 #endif
 #if defined(__AVX2__)
--- a/algo/lyra2/sponge.h
+++ b/algo/lyra2/sponge.h
@@ -91,7 +91,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
   LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
   LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \

-#elif defined(__SSE2__)
+#elif defined(__SSE4_2__)

 // process 2 columns in parallel
 // returns void, all args updated
@@ -108,7 +108,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
 #define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
   G_2X64( s0, s2, s4, s6 ); \
   G_2X64( s1, s3, s5, s7 ); \
-   mm128_rol1x64_256( s2, s3 ); \
+   mm128_ror1x64_256( s2, s3 ); \
   mm128_swap128_256( s4, s5 ); \
   mm128_rol1x64_256( s6, s7 ); \
   G_2X64( s0, s2, s4, s6 ); \
@@ -132,7 +132,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
   LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \


-#endif // AVX2
+#endif // AVX2 else SSE4_2

 // Scalar
 //Blake2b's G function
--- a/algo/sha/sha2-hash-4way.c
+++ b/algo/sha/sha2-hash-4way.c
@@ -30,7 +30,7 @@
 * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
 */

-#if defined(__SSE4_2__)
+#if defined(__SSE2__)

 #include <stddef.h>
 #include <string.h>
@@ -716,4 +716,4 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
 }

 #endif  // __AVX2__
-#endif  // __SSE4_2__
+#endif  // __SSE2__
--- a/algo/sha/sha2-hash-4way.h
+++ b/algo/sha/sha2-hash-4way.h
@@ -44,7 +44,8 @@
 #include "sph_types.h"
 #include "avxdefs.h"

-#if defined(__SSE4_2__)
+#if defined(__SSE2__)
+//#if defined(__SSE4_2__)

 //#define SPH_SIZE_sha256   256

--- a/algo/sha/sha256t-4way.c
+++ b/algo/sha/sha256t-4way.c
@@ -108,7 +108,9 @@ int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
   return 0;
 }

-#elif defined(SHA256T_4WAY)
+#endif
+
+#if defined(SHA256T_4WAY)

 static __thread sha256_4way_context sha256_ctx4 __attribute__ ((aligned (64)));

--- a/algo/sha/sha256t-gate.c
+++ b/algo/sha/sha256t-gate.c
@@ -3,15 +3,15 @@
 bool register_sha256t_algo( algo_gate_t* gate )
 {
 #if defined(SHA256T_8WAY)
-    gate->optimizations = SSE42_OPT | AVX2_OPT;
+    gate->optimizations = SSE2_OPT | AVX2_OPT;
    gate->scanhash   = (void*)&scanhash_sha256t_8way;
    gate->hash       = (void*)&sha256t_8way_hash;
 #elif defined(SHA256T_4WAY)
-    gate->optimizations = SSE42_OPT | AVX2_OPT;
+    gate->optimizations = SSE2_OPT | AVX2_OPT;
    gate->scanhash   = (void*)&scanhash_sha256t_4way;
    gate->hash       = (void*)&sha256t_4way_hash;
 #else
-    gate->optimizations = SSE42_OPT | AVX2_OPT | SHA_OPT;
+    gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
    gate->scanhash   = (void*)&scanhash_sha256t;
    gate->hash       = (void*)&sha256t_hash;
 #endif
--- a/algo/sha/sha256t-gate.h
+++ b/algo/sha/sha256t-gate.h
@@ -6,7 +6,8 @@

 // Override multi way on ryzen, SHA is better.
 #if !defined(RYZEN_)
-#if defined(__SSE4_2__)
+//#if defined(__SSE4_2__)
+#if defined(__SSE2__)
  #define SHA256T_4WAY
 #endif
 #if defined(__AVX2__)
@@ -22,7 +23,7 @@ void sha256t_8way_hash( void *output, const void *input );
 int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
                           uint64_t *hashes_done, struct thr_info *mythr );

-#elif defined (SHA256T_4WAY)
+#elif defined(SHA256T_4WAY)

 void sha256t_4way_hash( void *output, const void *input );
 int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
--- a/algo/sha/sha256t.c
+++ b/algo/sha/sha256t.c
@@ -5,8 +5,6 @@
 #include <stdio.h>
 #include <openssl/sha.h>

-#if !defined(SHA256T_4WAY)
-
 static __thread SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));

 void sha256t_midstate( const void* input )
@@ -100,4 +98,3 @@ int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce,
   pdata[19] = n;
   return 0;
 }
-#endif
--- a/algo/x17/sonoa-4way.c
+++ b/algo/x17/sonoa-4way.c
@@ -819,10 +819,7 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
     uint32_t *ptarget = work->target;
     uint32_t n = pdata[19];
     const uint32_t first_nonce = pdata[19];
-     uint32_t *nonces = work->nonces;
-     int num_found = 0;
     __m256i  *noncev = (__m256i*)vdata + 9;   // aligned
-//     uint32_t *noncep = vdata + 73;   // 9*8 + 1
     const uint32_t Htarg = ptarget[7];
     /* int */ thr_id = mythr->id;  // thr_id arg is deprecated
     uint64_t htmax[] = {          0,        0xF,       0xFF,
@@ -855,18 +852,23 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
              if ( fulltest( lane_hash, ptarget ) )
              {
                 pdata[19] = n + lane;
-                 nonces[ num_found++ ] = n + lane;
                 work_set_target_ratio( work, lane_hash );
+                 if ( submit_work( mythr, work ) )
+                    applog( LOG_NOTICE,
+                             "Share %d submitted by thread %d, lane %d.",
+                             accepted_share_count + rejected_share_count + 1,
+                             thr_id, lane );
+                 else
+                    applog( LOG_WARNING, "Failed to submit share." );
              }
           }
           n += 4;
-        } while ( ( num_found == 0 ) && ( n < max_nonce )
-                  && !work_restart[thr_id].restart );
+        } while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
        break;
     }

     *hashes_done = n - first_nonce + 1;
-     return num_found;
+     return 0;
 }

 #endif
--- a/algo/x17/x17-4way.c
+++ b/algo/x17/x17-4way.c
@@ -242,8 +242,6 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
     uint32_t *ptarget = work->target;
     uint32_t n = pdata[19];
     const uint32_t first_nonce = pdata[19];
-     uint32_t *nonces = work->nonces;
-     int num_found = 0;
     __m256i  *noncev = (__m256i*)vdata + 9;   // aligned
     /* int */ thr_id = mythr->id;  // thr_id arg is deprecated
     const uint32_t Htarg = ptarget[7];
@@ -277,18 +275,23 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
              if ( fulltest( lane_hash, ptarget ) )
              {
                 pdata[19] = n + lane;
-                 nonces[ num_found++ ] = n + lane;
                 work_set_target_ratio( work, lane_hash );
+                 if ( submit_work( mythr, work ) )
+                    applog( LOG_NOTICE,
+			     "Share %d submitted by thread %d, lane %d.",
+                             accepted_share_count + rejected_share_count + 1,
+                             thr_id, lane );
+                 else
+                    applog( LOG_WARNING, "Failed to submit share." );
              }
           }
           n += 4;
-        } while ( ( num_found == 0 ) && ( n < max_nonce )
-                   && !work_restart[thr_id].restart );
+        } while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
        break;
     }

     *hashes_done = n - first_nonce + 1;
-     return num_found;
+     return 0;
 }

 #endif
--- a/algo/yespower/yespower-opt.c
+++ b/algo/yespower/yespower-opt.c
@@ -528,7 +528,7 @@ static volatile uint64_t Smask2var = Smask2;
 /* 64-bit without AVX.  This relies on out-of-order execution and register
 * renaming.  It may actually be fastest on CPUs with AVX(2) as well - e.g.,
 * it runs great on Haswell. */
-#warning "Note: using x86-64 inline assembly for pwxform.  That's great."
+//#warning "Note: using x86-64 inline assembly for pwxform.  That's great."
 #undef MAYBE_MEMORY_BARRIER
 #define MAYBE_MEMORY_BARRIER \
 	__asm__("" : : : "memory");