v24.1

2026-07-15 03:16:49 +00:00 · 2024-04-16 21:31:35 -04:00
parent 9d3a46c355
commit 4f930574cc
33 changed files with 757 additions and 609 deletions
--- a/algo/m7m/m7m.c
+++ b/algo/m7m/m7m.c
@@ -21,7 +21,7 @@
 #define EPS1 DBL_EPSILON
 #define EPS2 3.0e-11

-inline double exp_n( double xt )
+static inline double exp_n( double xt )
 {
    if ( xt < -700.0 )
        return 0;
@@ -33,7 +33,7 @@ inline double exp_n( double xt )
        return exp( xt );
 }

-inline double exp_n2( double x1, double x2 )
+static inline double exp_n2( double x1, double x2 )
 {
    double p1 = -700., p2 = -37., p3 = -0.8e-8, p4 = 0.8e-8,
           p5 = 37., p6 = 700.;
--- a/algo/m7m/magimath.cpp
+++ b/algo/m7m/magimath.cpp
@@ -1,75 +0,0 @@
-// Copyright (c) 2014 The Magi developers
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#include <iostream>
-#include <cfloat>
-#include <limits>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-
-#include "magimath.h"
-
-#define EPS1 (std::numeric_limits<double>::epsilon())
-#define EPS2 3.0e-11
-
-static void gauleg(double x1, double x2, double x[], double w[], const int n)
-{
-	int m,j,i;
-	double z1, z, xm, xl, pp, p3, p2, p1;
-	m=(n+1)/2;
-	xm=0.5*(x2+x1);
-	xl=0.5*(x2-x1);
-	for (i=1;i<=m;i++) {
-		z=cos(3.141592654*(i-0.25)/(n+0.5));
-		do {
-			p1=1.0;
-			p2=0.0;
-			for (j=1;j<=n;j++) {
-				p3=p2;
-				p2=p1;
-				p1=((2.0*j-1.0)*z*p2-(j-1.0)*p3)/j;
-			}
-			pp=n*(z*p1-p2)/(z*z-1.0);
-			z1=z;
-			z=z1-p1/pp;
-		} while (fabs(z-z1) > EPS2);
-		x[i]=xm-xl*z;
-		x[n+1-i]=xm+xl*z;
-		w[i]=2.0*xl/((1.0-z*z)*pp*pp);
-		w[n+1-i]=w[i];
-	}
-}
-
-static double GaussianQuad_N(double func(const double), const double a2, const double b2, const int NptGQ)
-{
-	double s=0.0;
-#ifdef _MSC_VER
-#define SW_DIVS 23
-	double x[SW_DIVS+1], w[SW_DIVS+1];
-#else
-	double x[NptGQ+1], w[NptGQ+1];
-#endif
-
-	gauleg(a2, b2, x, w, NptGQ);
-
-	for (int j=1; j<=NptGQ; j++) {
-		s += w[j]*func(x[j]);
-	}
-
-	return s;
-}
-
-static double swit_(double wvnmb)
-{
-	return pow( (5.55243*(exp_n(-0.3*wvnmb/15.762) - exp_n(-0.6*wvnmb/15.762)))*wvnmb, 0.5)
-		/ 1034.66 * pow(sin(wvnmb/65.), 2.);
-}
-
-uint32_t sw_(int nnounce, int divs)
-{
-	double wmax = ((sqrt((double)(nnounce))*(1.+EPS1))/450+100);
-	return ((uint32_t)(GaussianQuad_N(swit_, 0., wmax, divs)*(1.+EPS1)*1.e6));
-}
--- a/algo/m7m/magimath.h
+++ b/algo/m7m/magimath.h
@@ -1,54 +0,0 @@
-// Copyright (c) 2014 The Magi developers
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-#ifndef MAGI_MATH_H
-#define MAGI_MATH_H
-
-#include <math.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-uint32_t sw_(int nnounce, int divs);
-
-#ifdef __cplusplus
-}
-#endif
-
-
-inline double exp_n(double xt)
-{
-	double p1 = -700.0, p3 = -0.8e-8, p4 = 0.8e-8, p6 = 700.0;
-	if(xt < p1)
-		return 0;
-	else if(xt > p6)
-		return 1e200;
-	else if(xt > p3 && xt < p4)
-		return (1.0 + xt);
-	else
-		return exp(xt);
-}
-
-// 1 / (1 + exp(x1-x2))
-inline double exp_n2(double x1, double x2)
-{
-	double p1 = -700., p2 = -37., p3 = -0.8e-8, p4 = 0.8e-8, p5 = 37., p6 = 700.;
-	double xt = x1 - x2;
-	if (xt < p1+1.e-200)
-		return 1.;
-	else if (xt > p1 && xt < p2 + 1.e-200)
-		return ( 1. - exp(xt) );
-	else if (xt > p2 && xt < p3 + 1.e-200)
-		return ( 1. / (1. + exp(xt)) );
-	else if (xt > p3 && xt < p4)
-		return ( 1. / (2. + xt) );
-	else if (xt > p4 - 1.e-200 && xt < p5)
-		return ( exp(-xt) / (1. + exp(-xt)) );
-	else if (xt > p5 - 1.e-200 && xt < p6)
-		return ( exp(-xt) );
-	else //if (xt > p6 - 1.e-200)
-		return 0.;
-}
-
-#endif
--- a/algo/sha/sha1-hash.c
+++ b/algo/sha/sha1-hash.c
@@ -205,7 +205,7 @@ void sha1_x86_sha_transform_be( uint32_t *state_out, const void *input,

 #endif

-#if defined(__aarch64__) && defined(__ARM_FEATURE_SHA2)
+#if defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2)

 #define sha1_neon_rounds( state_out, data, state_in ) \
 { \
--- a/algo/sha/sha256-hash.c
+++ b/algo/sha/sha256-hash.c
@@ -1,6 +1,6 @@
 #include "sha256-hash.h"

-#if ( defined(__x86_64__) && defined(__SHA__) ) || defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2)
+#if ( defined(__x86_64__) && defined(__SHA__) ) || ( defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2) )

 static const uint32_t SHA256_IV[8] =
 {
@@ -189,7 +189,7 @@ static const uint32_t SHA256_IV[8] =
    _mm_store_si128( (__m128i*) &state_out[4], STATE1 ); \
 }

-void sha256_opt_transform_le( uint32_t *state_out, const void *input,
+void sha256_x86_sha_transform_le( uint32_t *state_out, const void *input,
                              const uint32_t *state_in )
 {
 #define load_msg( m, i ) casti_v128( m, i )
@@ -197,7 +197,7 @@ void sha256_opt_transform_le( uint32_t *state_out, const void *input,
 #undef load_msg
 }

-void sha256_opt_transform_be( uint32_t *state_out, const void *input,
+void sha256_x86_sha_transform_be( uint32_t *state_out, const void *input,
                              const uint32_t *state_in )
 {
 #define load_msg( m, i ) v128_bswap32( casti_v128( m, i ) )
@@ -517,7 +517,7 @@ void sha256_opt_transform_be( uint32_t *state_out, const void *input,
    _mm_store_si128( (__m128i*) &out_Y[4], STATE1_Y ); \
 }

-void sha256_ni2x_transform_le( uint32_t *out_X, uint32_t*out_Y,
+void sha256_x86_x2sha_transform_le( uint32_t *out_X, uint32_t*out_Y,
                                 const void *msg_X, const void *msg_Y,
                                 const uint32_t *in_X, const uint32_t *in_Y )
 {
@@ -526,7 +526,7 @@ void sha256_ni2x_transform_le( uint32_t *out_X, uint32_t*out_Y,
 #undef load_msg
 }

-void sha256_ni2x_transform_be( uint32_t *out_X, uint32_t*out_Y,
+void sha256_x86_x2sha_transform_be( uint32_t *out_X, uint32_t*out_Y,
                              const void *msg_X, const void *msg_Y,
                              const uint32_t *in_X, const uint32_t *in_Y )
 {
@@ -541,7 +541,7 @@ void sha256_ni2x_transform_be( uint32_t *out_X, uint32_t*out_Y,
 // The goal is to avoid any redundant processing in final. Prehash is almost
 // 4 rounds total, only missing the final addition of the nonce.
 // Nonce must be set to zero for prehash.
-void sha256_ni_prehash_3rounds( uint32_t *ostate, const void *msg,
+void sha256_x86_sha_prehash_3rounds( uint32_t *ostate, const void *msg,
                                uint32_t *sstate, const uint32_t *istate )
 {
   __m128i STATE0, STATE1, MSG, TMP;
@@ -569,7 +569,7 @@ void sha256_ni_prehash_3rounds( uint32_t *ostate, const void *msg,
   casti_m128i( ostate, 1 ) = STATE1;
 }

-void sha256_ni2x_final_rounds( uint32_t *out_X, uint32_t *out_Y,
+void sha256_x86_x2sha_final_rounds( uint32_t *out_X, uint32_t *out_Y,
                 const void *msg_X, const void *msg_Y,
                 const uint32_t *state_mid_X, const uint32_t *state_mid_Y,
                 const uint32_t *state_save_X, const uint32_t *state_save_Y )
--- a/algo/sha/sha256-hash.h
+++ b/algo/sha/sha256-hash.h
@@ -5,27 +5,21 @@
 #include "simd-utils.h"
 #include "cpuminer-config.h"

-// generic interface 
+static const uint32_t SHA256_IV[8];
+
+#if defined(__x86_64__) && defined(__SHA__)

 typedef struct
 {
-   unsigned char buf[64];    /* first field, for alignment */
+   unsigned char buf[64];
   uint32_t state[8];
   uint64_t count;
 } sha256_context __attribute__((aligned(64)));

-static const uint32_t SHA256_IV[8];
-
 void sha256_full( void *hash, const void *data, size_t len );
 void sha256_update( sha256_context *ctx, const void *data, size_t len );
 void sha256_final( sha256_context *ctx, void *hash );
 void sha256_ctx_init( sha256_context *ctx );
-void sha256_transform_le( uint32_t *state_out, const uint32_t *data,
-                          const uint32_t *state_in );
-void sha256_transform_be( uint32_t *state_out, const uint32_t *data,
-                          const uint32_t *state_in );
-
-#if defined(__x86_64__) && defined(__SHA__)

 void sha256_x86_sha_transform_le( uint32_t *state_out, const void *input,
                                  const uint32_t *state_in );
@@ -50,14 +44,6 @@ void sha256_x86_x2sha_final_rounds( uint32_t *state_out_X, uint32_t *state_out_Y
                 const uint32_t *state_mid_X, const uint32_t *state_mid_Y,
                 const uint32_t *state_save_X, const uint32_t *state_save_Y );

-// Temporary during name transition
-#define sha256_opt_transform_le   sha256_x86_sha_transform_le
-#define sha256_opt_transform_be   sha256_x86_sha_transform_be
-#define sha256_ni2x_transform_le  sha256_x86_x2sha_transform_le
-#define sha256_ni2x_transform_be  sha256_x86_x2sha_transform_be
-#define sha256_ni_prehash_3rounds sha256_x86_sha_prehash_3rounds
-#define sha256_ni2x_final_rounds  sha256_x86_x2sha_final_rounds
-
 // generic API
 #define sha256_transform_le        sha256_x86_sha_transform_le
 #define sha256_transform_be        sha256_x86_sha_transform_be
@@ -68,6 +54,20 @@ void sha256_x86_x2sha_final_rounds( uint32_t *state_out_X, uint32_t *state_out_Y

 #elif defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2)

+// SHA-256 AArch64 with NEON & SHA2
+
+typedef struct
+{
+   unsigned char buf[64];
+   uint32_t state[8];
+   uint64_t count;
+} sha256_context __attribute__((aligned(64)));
+
+void sha256_full( void *hash, const void *data, size_t len );
+void sha256_update( sha256_context *ctx, const void *data, size_t len );
+void sha256_final( sha256_context *ctx, void *hash );
+void sha256_ctx_init( sha256_context *ctx );
+
 void sha256_neon_sha_transform_be( uint32_t *state_out, const void *input,
                                   const uint32_t *state_in );
 void sha256_neon_sha_transform_le( uint32_t *state_out, const void *input,
@@ -89,14 +89,6 @@ void sha256_neon_x2sha_final_rounds( uint32_t *state_out_X,
                 const uint32_t *state_mid_X, const uint32_t *state_mid_Y,
                 const uint32_t *state_save_X, const uint32_t *state_save_Y );

-// Temporary during name transition
-#define sha256_transform_le         sha256_neon_sha_transform_le
-#define sha256_transform_be         sha256_neon_sha_transform_be
-#define sha256_2x_transform_le      sha256_neon_x2sha_transform_le
-#define sha256_2x_transform_be      sha256_neon_x2sha_transform_be
-#define sha256_prehash_3rounds      sha256_neon_sha_prehash_3rounds
-#define sha256_2x_final_rounds      sha256_neon_x2sha_final_rounds
-
 // generic API
 #define sha256_transform_le        sha256_neon_sha_transform_le
 #define sha256_transform_be        sha256_neon_sha_transform_be
@@ -106,9 +98,11 @@ void sha256_neon_x2sha_final_rounds( uint32_t *state_out_X,
 #define sha256_2x_final_rounds     sha256_neon_x2sha_final_rounds

 #else
+
 // without HW acceleration...
 #include "sph_sha2.h"

+#define sha256_context              sph_sha256_context
 #define sha256_full                 sph_sha256_full
 #define sha256_ctx_init             sph_sha256_init
 #define sha256_update               sph_sha256
@@ -117,12 +111,11 @@ void sha256_neon_x2sha_final_rounds( uint32_t *state_out_X,
 #define sha256_transform_be         sph_sha256_transform_be
 #define sha256_prehash_3rounds      sph_sha256_prehash_3rounds

-
 #endif

 #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)

-// SHA-256 16 way
+// SHA-256 16 way x86_64

 typedef struct
 {
@@ -147,7 +140,7 @@ void sha256_16x32_final_rounds( __m512i *state_out, const __m512i *data,
 int sha256_16x32_transform_le_short( __m512i *state_out, const __m512i *data,
                            const __m512i *state_in, const uint32_t *target );

-#define sha256_16way_context sha256_16x32_context
+#define sha256_16way_context               sha256_16x32_context
 #define sha256_16way_init                  sha256_16x32_init
 #define sha256_16way_update                sha256_16x32_update
 #define sha256_16way_close                 sha256_16x32_close
@@ -162,7 +155,7 @@ int sha256_16x32_transform_le_short( __m512i *state_out, const __m512i *data,

 #if defined (__AVX2__)

-// SHA-256 8 way
+// SHA-256 8 way x86_64

 typedef struct
 {
@@ -201,7 +194,7 @@ int sha256_8x32_transform_le_short( __m256i *state_out, const __m256i *data,

 #endif  // AVX2

-// SHA-256 4 way
+// SHA-256 4 way x86_64 with SSE2 or AArch64 with NEON

 typedef struct
 {
--- a/algo/x11/timetravel-gate.c
+++ b/algo/x11/timetravel-gate.c
@@ -16,14 +16,14 @@ bool register_timetravel_algo( algo_gate_t* gate )
  return true;
 };

-inline void tt_swap( int *a, int *b )
+static inline void tt_swap( int *a, int *b )
 {
        int c = *a;
        *a = *b;
        *b = c;
 }

-inline void reverse( int *pbegin, int *pend )
+static inline void reverse( int *pbegin, int *pend )
 {
   while ( (pbegin != pend) && (pbegin != --pend) )
   {
--- a/algo/x11/timetravel10-gate.c
+++ b/algo/x11/timetravel10-gate.c
@@ -16,14 +16,14 @@ bool register_timetravel10_algo( algo_gate_t* gate )
  return true;
 };

-inline void tt10_swap( int *a, int *b )
+static inline void tt10_swap( int *a, int *b )
 {
        int c = *a;
        *a = *b;
        *b = c;
 }

-inline void reverse( int *pbegin, int *pend )
+static inline void reverse( int *pbegin, int *pend )
 {
   while ( (pbegin != pend) && (pbegin != --pend) )
   {
--- a/algo/x16/x16rv2-4way.c
+++ b/algo/x16/x16rv2-4way.c
@@ -730,7 +730,7 @@ typedef union _x16rv2_4way_context_overlay x16rv2_4way_context_overlay;
 static __thread x16rv2_4way_context_overlay x16rv2_ctx;

 // Pad the 24 bytes tiger hash to 64 bytes
-inline void padtiger512( uint32_t* hash )
+static inline void padtiger512( uint32_t* hash )
 {
  for ( int i = 6; i < 16; i++ ) hash[i] = 0;
 }
--- a/algo/x16/x16rv2.c
+++ b/algo/x16/x16rv2.c
@@ -33,7 +33,7 @@ union _x16rv2_context_overlay
 typedef union _x16rv2_context_overlay x16rv2_context_overlay;

 // Pad the 24 bytes tiger hash to 64 bytes
-inline void padtiger512(uint32_t* hash) {
+static inline void padtiger512(uint32_t* hash) {
   for (int i = (24/4); i < (64/4); i++) hash[i] = 0;
 }