v24.1

2025-09-17 23:44:27 +00:00 · 2024-04-16 21:31:35 -04:00
parent 9d3a46c355
commit 4f930574cc
33 changed files with 757 additions and 609 deletions
--- a/Makefile.am
+++ b/Makefile.am
@@ -16,6 +16,7 @@ bin_PROGRAMS	= cpuminer
 dist_man_MANS	= cpuminer.1

 cpuminer_SOURCES = \
+  dummy.cpp \
  cpu-miner.c \
  util.c \
  api.c \
@@ -113,7 +114,6 @@ cpuminer_SOURCES = \
  algo/lyra2/phi2-4way.c \
  algo/lyra2/phi2.c \
  algo/m7m/m7m.c \
-  algo/m7m/magimath.cpp \
  algo/nist5/nist5-gate.c \
  algo/nist5/nist5-4way.c \
  algo/nist5/nist5.c \
@@ -289,7 +289,7 @@ if HAVE_WINDOWS
 endif

 cpuminer_LDFLAGS	= @LDFLAGS@
-cpuminer_LDADD	= @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@  -lgmp
+cpuminer_LDADD	= @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ -lgmp
 cpuminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ $(ALL_INCLUDES)
 cpuminer_CFLAGS   = -Wno-pointer-sign -Wno-pointer-to-int-cast $(disable_flags)

--- a/9
+++ b/9
@@ -75,6 +75,15 @@ If not what makes it happen or not happen?
 Change Log
 ----------

+v24.1
+
+#414: fix bug in merkle error handling.
+#416: change $nproc to $(nproc) in build scripts.
+#420: change some inline function definitions to static inline. 
+#413: Fix formatting error for share result log when using no-color.
+Faster 2 way interleaving.
+Cleanup sha256 architecture targetting.
+
 v23.15

 Fixed x11gost (sib) algo for all architectures, broken in v3.23.4.
--- a/algo-gate-api.h
+++ b/algo-gate-api.h
@@ -107,16 +107,16 @@ typedef  uint32_t set_t;
 //  AVX10_256 is compatible with AVX2 + VAES

 // return set containing all elements from sets a & b
-inline set_t set_union ( set_t a, set_t b ) { return a | b; }
+static inline set_t set_union ( set_t a, set_t b ) { return a | b; }

 // return set contained common elements from sets a & b
-inline set_t set_intsec ( set_t a, set_t b) { return a & b; }
+static inline set_t set_intsec ( set_t a, set_t b) { return a & b; }

 // all elements in set a are included in set b
-inline bool set_incl ( set_t a, set_t b ) { return (a & b) == a; }
+static inline bool set_incl ( set_t a, set_t b ) { return (a & b) == a; }

 // no elements in set a are included in set b
-inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
+static inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }

 typedef struct
 {
--- a/algo/m7m/m7m.c
+++ b/algo/m7m/m7m.c
@@ -21,7 +21,7 @@
 #define EPS1 DBL_EPSILON
 #define EPS2 3.0e-11

-inline double exp_n( double xt )
+static inline double exp_n( double xt )
 {
    if ( xt < -700.0 )
        return 0;
@@ -33,7 +33,7 @@ inline double exp_n( double xt )
        return exp( xt );
 }

-inline double exp_n2( double x1, double x2 )
+static inline double exp_n2( double x1, double x2 )
 {
    double p1 = -700., p2 = -37., p3 = -0.8e-8, p4 = 0.8e-8,
           p5 = 37., p6 = 700.;
--- a/algo/m7m/magimath.cpp
+++ b/algo/m7m/magimath.cpp
@@ -1,75 +0,0 @@
-// Copyright (c) 2014 The Magi developers
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#include <iostream>
-#include <cfloat>
-#include <limits>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-
-#include "magimath.h"
-
-#define EPS1 (std::numeric_limits<double>::epsilon())
-#define EPS2 3.0e-11
-
-static void gauleg(double x1, double x2, double x[], double w[], const int n)
-{
-	int m,j,i;
-	double z1, z, xm, xl, pp, p3, p2, p1;
-	m=(n+1)/2;
-	xm=0.5*(x2+x1);
-	xl=0.5*(x2-x1);
-	for (i=1;i<=m;i++) {
-		z=cos(3.141592654*(i-0.25)/(n+0.5));
-		do {
-			p1=1.0;
-			p2=0.0;
-			for (j=1;j<=n;j++) {
-				p3=p2;
-				p2=p1;
-				p1=((2.0*j-1.0)*z*p2-(j-1.0)*p3)/j;
-			}
-			pp=n*(z*p1-p2)/(z*z-1.0);
-			z1=z;
-			z=z1-p1/pp;
-		} while (fabs(z-z1) > EPS2);
-		x[i]=xm-xl*z;
-		x[n+1-i]=xm+xl*z;
-		w[i]=2.0*xl/((1.0-z*z)*pp*pp);
-		w[n+1-i]=w[i];
-	}
-}
-
-static double GaussianQuad_N(double func(const double), const double a2, const double b2, const int NptGQ)
-{
-	double s=0.0;
-#ifdef _MSC_VER
-#define SW_DIVS 23
-	double x[SW_DIVS+1], w[SW_DIVS+1];
-#else
-	double x[NptGQ+1], w[NptGQ+1];
-#endif
-
-	gauleg(a2, b2, x, w, NptGQ);
-
-	for (int j=1; j<=NptGQ; j++) {
-		s += w[j]*func(x[j]);
-	}
-
-	return s;
-}
-
-static double swit_(double wvnmb)
-{
-	return pow( (5.55243*(exp_n(-0.3*wvnmb/15.762) - exp_n(-0.6*wvnmb/15.762)))*wvnmb, 0.5)
-		/ 1034.66 * pow(sin(wvnmb/65.), 2.);
-}
-
-uint32_t sw_(int nnounce, int divs)
-{
-	double wmax = ((sqrt((double)(nnounce))*(1.+EPS1))/450+100);
-	return ((uint32_t)(GaussianQuad_N(swit_, 0., wmax, divs)*(1.+EPS1)*1.e6));
-}
--- a/algo/m7m/magimath.h
+++ b/algo/m7m/magimath.h
@@ -1,54 +0,0 @@
-// Copyright (c) 2014 The Magi developers
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-#ifndef MAGI_MATH_H
-#define MAGI_MATH_H
-
-#include <math.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-uint32_t sw_(int nnounce, int divs);
-
-#ifdef __cplusplus
-}
-#endif
-
-
-inline double exp_n(double xt)
-{
-	double p1 = -700.0, p3 = -0.8e-8, p4 = 0.8e-8, p6 = 700.0;
-	if(xt < p1)
-		return 0;
-	else if(xt > p6)
-		return 1e200;
-	else if(xt > p3 && xt < p4)
-		return (1.0 + xt);
-	else
-		return exp(xt);
-}
-
-// 1 / (1 + exp(x1-x2))
-inline double exp_n2(double x1, double x2)
-{
-	double p1 = -700., p2 = -37., p3 = -0.8e-8, p4 = 0.8e-8, p5 = 37., p6 = 700.;
-	double xt = x1 - x2;
-	if (xt < p1+1.e-200)
-		return 1.;
-	else if (xt > p1 && xt < p2 + 1.e-200)
-		return ( 1. - exp(xt) );
-	else if (xt > p2 && xt < p3 + 1.e-200)
-		return ( 1. / (1. + exp(xt)) );
-	else if (xt > p3 && xt < p4)
-		return ( 1. / (2. + xt) );
-	else if (xt > p4 - 1.e-200 && xt < p5)
-		return ( exp(-xt) / (1. + exp(-xt)) );
-	else if (xt > p5 - 1.e-200 && xt < p6)
-		return ( exp(-xt) );
-	else //if (xt > p6 - 1.e-200)
-		return 0.;
-}
-
-#endif
--- a/algo/sha/sha1-hash.c
+++ b/algo/sha/sha1-hash.c
@@ -205,7 +205,7 @@ void sha1_x86_sha_transform_be( uint32_t *state_out, const void *input,

 #endif

-#if defined(__aarch64__) && defined(__ARM_FEATURE_SHA2)
+#if defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2)

 #define sha1_neon_rounds( state_out, data, state_in ) \
 { \
--- a/algo/sha/sha256-hash.c
+++ b/algo/sha/sha256-hash.c
@@ -1,6 +1,6 @@
 #include "sha256-hash.h"

-#if ( defined(__x86_64__) && defined(__SHA__) ) || defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2)
+#if ( defined(__x86_64__) && defined(__SHA__) ) || ( defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2) )

 static const uint32_t SHA256_IV[8] =
 {
@@ -189,7 +189,7 @@ static const uint32_t SHA256_IV[8] =
    _mm_store_si128( (__m128i*) &state_out[4], STATE1 ); \
 }

-void sha256_opt_transform_le( uint32_t *state_out, const void *input,
+void sha256_x86_sha_transform_le( uint32_t *state_out, const void *input,
                              const uint32_t *state_in )
 {
 #define load_msg( m, i ) casti_v128( m, i )
@@ -197,7 +197,7 @@ void sha256_opt_transform_le( uint32_t *state_out, const void *input,
 #undef load_msg
 }

-void sha256_opt_transform_be( uint32_t *state_out, const void *input,
+void sha256_x86_sha_transform_be( uint32_t *state_out, const void *input,
                              const uint32_t *state_in )
 {
 #define load_msg( m, i ) v128_bswap32( casti_v128( m, i ) )
@@ -517,7 +517,7 @@ void sha256_opt_transform_be( uint32_t *state_out, const void *input,
    _mm_store_si128( (__m128i*) &out_Y[4], STATE1_Y ); \
 }

-void sha256_ni2x_transform_le( uint32_t *out_X, uint32_t*out_Y,
+void sha256_x86_x2sha_transform_le( uint32_t *out_X, uint32_t*out_Y,
                                 const void *msg_X, const void *msg_Y,
                                 const uint32_t *in_X, const uint32_t *in_Y )
 {
@@ -526,7 +526,7 @@ void sha256_ni2x_transform_le( uint32_t *out_X, uint32_t*out_Y,
 #undef load_msg
 }

-void sha256_ni2x_transform_be( uint32_t *out_X, uint32_t*out_Y,
+void sha256_x86_x2sha_transform_be( uint32_t *out_X, uint32_t*out_Y,
                              const void *msg_X, const void *msg_Y,
                              const uint32_t *in_X, const uint32_t *in_Y )
 {
@@ -541,7 +541,7 @@ void sha256_ni2x_transform_be( uint32_t *out_X, uint32_t*out_Y,
 // The goal is to avoid any redundant processing in final. Prehash is almost
 // 4 rounds total, only missing the final addition of the nonce.
 // Nonce must be set to zero for prehash.
-void sha256_ni_prehash_3rounds( uint32_t *ostate, const void *msg,
+void sha256_x86_sha_prehash_3rounds( uint32_t *ostate, const void *msg,
                                uint32_t *sstate, const uint32_t *istate )
 {
   __m128i STATE0, STATE1, MSG, TMP;
@@ -569,7 +569,7 @@ void sha256_ni_prehash_3rounds( uint32_t *ostate, const void *msg,
   casti_m128i( ostate, 1 ) = STATE1;
 }

-void sha256_ni2x_final_rounds( uint32_t *out_X, uint32_t *out_Y,
+void sha256_x86_x2sha_final_rounds( uint32_t *out_X, uint32_t *out_Y,
                 const void *msg_X, const void *msg_Y,
                 const uint32_t *state_mid_X, const uint32_t *state_mid_Y,
                 const uint32_t *state_save_X, const uint32_t *state_save_Y )
--- a/algo/sha/sha256-hash.h
+++ b/algo/sha/sha256-hash.h
@@ -5,27 +5,21 @@
 #include "simd-utils.h"
 #include "cpuminer-config.h"

-// generic interface 
+static const uint32_t SHA256_IV[8];
+
+#if defined(__x86_64__) && defined(__SHA__)

 typedef struct
 {
-   unsigned char buf[64];    /* first field, for alignment */
+   unsigned char buf[64];
   uint32_t state[8];
   uint64_t count;
 } sha256_context __attribute__((aligned(64)));

-static const uint32_t SHA256_IV[8];
-
 void sha256_full( void *hash, const void *data, size_t len );
 void sha256_update( sha256_context *ctx, const void *data, size_t len );
 void sha256_final( sha256_context *ctx, void *hash );
 void sha256_ctx_init( sha256_context *ctx );
-void sha256_transform_le( uint32_t *state_out, const uint32_t *data,
-                          const uint32_t *state_in );
-void sha256_transform_be( uint32_t *state_out, const uint32_t *data,
-                          const uint32_t *state_in );
-
-#if defined(__x86_64__) && defined(__SHA__)

 void sha256_x86_sha_transform_le( uint32_t *state_out, const void *input,
                                  const uint32_t *state_in );
@@ -50,14 +44,6 @@ void sha256_x86_x2sha_final_rounds( uint32_t *state_out_X, uint32_t *state_out_Y
                 const uint32_t *state_mid_X, const uint32_t *state_mid_Y,
                 const uint32_t *state_save_X, const uint32_t *state_save_Y );

-// Temporary during name transition
-#define sha256_opt_transform_le   sha256_x86_sha_transform_le
-#define sha256_opt_transform_be   sha256_x86_sha_transform_be
-#define sha256_ni2x_transform_le  sha256_x86_x2sha_transform_le
-#define sha256_ni2x_transform_be  sha256_x86_x2sha_transform_be
-#define sha256_ni_prehash_3rounds sha256_x86_sha_prehash_3rounds
-#define sha256_ni2x_final_rounds  sha256_x86_x2sha_final_rounds
-
 // generic API
 #define sha256_transform_le        sha256_x86_sha_transform_le
 #define sha256_transform_be        sha256_x86_sha_transform_be
@@ -68,6 +54,20 @@ void sha256_x86_x2sha_final_rounds( uint32_t *state_out_X, uint32_t *state_out_Y

 #elif defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2)

+// SHA-256 AArch64 with NEON & SHA2
+
+typedef struct
+{
+   unsigned char buf[64];
+   uint32_t state[8];
+   uint64_t count;
+} sha256_context __attribute__((aligned(64)));
+
+void sha256_full( void *hash, const void *data, size_t len );
+void sha256_update( sha256_context *ctx, const void *data, size_t len );
+void sha256_final( sha256_context *ctx, void *hash );
+void sha256_ctx_init( sha256_context *ctx );
+
 void sha256_neon_sha_transform_be( uint32_t *state_out, const void *input,
                                   const uint32_t *state_in );
 void sha256_neon_sha_transform_le( uint32_t *state_out, const void *input,
@@ -89,14 +89,6 @@ void sha256_neon_x2sha_final_rounds( uint32_t *state_out_X,
                 const uint32_t *state_mid_X, const uint32_t *state_mid_Y,
                 const uint32_t *state_save_X, const uint32_t *state_save_Y );

-// Temporary during name transition
-#define sha256_transform_le         sha256_neon_sha_transform_le
-#define sha256_transform_be         sha256_neon_sha_transform_be
-#define sha256_2x_transform_le      sha256_neon_x2sha_transform_le
-#define sha256_2x_transform_be      sha256_neon_x2sha_transform_be
-#define sha256_prehash_3rounds      sha256_neon_sha_prehash_3rounds
-#define sha256_2x_final_rounds      sha256_neon_x2sha_final_rounds
-
 // generic API
 #define sha256_transform_le        sha256_neon_sha_transform_le
 #define sha256_transform_be        sha256_neon_sha_transform_be
@@ -106,9 +98,11 @@ void sha256_neon_x2sha_final_rounds( uint32_t *state_out_X,
 #define sha256_2x_final_rounds     sha256_neon_x2sha_final_rounds

 #else
+
 // without HW acceleration...
 #include "sph_sha2.h"

+#define sha256_context              sph_sha256_context
 #define sha256_full                 sph_sha256_full
 #define sha256_ctx_init             sph_sha256_init
 #define sha256_update               sph_sha256
@@ -117,12 +111,11 @@ void sha256_neon_x2sha_final_rounds( uint32_t *state_out_X,
 #define sha256_transform_be         sph_sha256_transform_be
 #define sha256_prehash_3rounds      sph_sha256_prehash_3rounds

-
 #endif

 #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)

-// SHA-256 16 way
+// SHA-256 16 way x86_64

 typedef struct
 {
@@ -147,7 +140,7 @@ void sha256_16x32_final_rounds( __m512i *state_out, const __m512i *data,
 int sha256_16x32_transform_le_short( __m512i *state_out, const __m512i *data,
                            const __m512i *state_in, const uint32_t *target );

-#define sha256_16way_context sha256_16x32_context
+#define sha256_16way_context               sha256_16x32_context
 #define sha256_16way_init                  sha256_16x32_init
 #define sha256_16way_update                sha256_16x32_update
 #define sha256_16way_close                 sha256_16x32_close
@@ -162,7 +155,7 @@ int sha256_16x32_transform_le_short( __m512i *state_out, const __m512i *data,

 #if defined (__AVX2__)

-// SHA-256 8 way
+// SHA-256 8 way x86_64

 typedef struct
 {
@@ -201,7 +194,7 @@ int sha256_8x32_transform_le_short( __m256i *state_out, const __m256i *data,

 #endif  // AVX2

-// SHA-256 4 way
+// SHA-256 4 way x86_64 with SSE2 or AArch64 with NEON

 typedef struct
 {
--- a/algo/x11/timetravel-gate.c
+++ b/algo/x11/timetravel-gate.c
@@ -16,14 +16,14 @@ bool register_timetravel_algo( algo_gate_t* gate )
  return true;
 };

-inline void tt_swap( int *a, int *b )
+static inline void tt_swap( int *a, int *b )
 {
        int c = *a;
        *a = *b;
        *b = c;
 }

-inline void reverse( int *pbegin, int *pend )
+static inline void reverse( int *pbegin, int *pend )
 {
   while ( (pbegin != pend) && (pbegin != --pend) )
   {
--- a/algo/x11/timetravel10-gate.c
+++ b/algo/x11/timetravel10-gate.c
@@ -16,14 +16,14 @@ bool register_timetravel10_algo( algo_gate_t* gate )
  return true;
 };

-inline void tt10_swap( int *a, int *b )
+static inline void tt10_swap( int *a, int *b )
 {
        int c = *a;
        *a = *b;
        *b = c;
 }

-inline void reverse( int *pbegin, int *pend )
+static inline void reverse( int *pbegin, int *pend )
 {
   while ( (pbegin != pend) && (pbegin != --pend) )
   {
--- a/algo/x16/x16rv2-4way.c
+++ b/algo/x16/x16rv2-4way.c
@@ -730,7 +730,7 @@ typedef union _x16rv2_4way_context_overlay x16rv2_4way_context_overlay;
 static __thread x16rv2_4way_context_overlay x16rv2_ctx;

 // Pad the 24 bytes tiger hash to 64 bytes
-inline void padtiger512( uint32_t* hash )
+static inline void padtiger512( uint32_t* hash )
 {
  for ( int i = 6; i < 16; i++ ) hash[i] = 0;
 }
--- a/algo/x16/x16rv2.c
+++ b/algo/x16/x16rv2.c
@@ -33,7 +33,7 @@ union _x16rv2_context_overlay
 typedef union _x16rv2_context_overlay x16rv2_context_overlay;

 // Pad the 24 bytes tiger hash to 64 bytes
-inline void padtiger512(uint32_t* hash) {
+static inline void padtiger512(uint32_t* hash) {
   for (int i = (24/4); i < (64/4); i++) hash[i] = 0;
 }

--- a/arm-build.sh
+++ b/arm-build.sh
@@ -9,6 +9,6 @@ rm -f config.status

 CFLAGS="-O3 -march=native -Wall -flax-vector-conversions" ./configure  --with-curl 

-make -j $nproc
+make -j $(nproc)

 strip -s cpuminer
--- a/armbuild-all.sh
+++ b/armbuild-all.sh
@@ -10,33 +10,33 @@ make distclean || echo clean
 rm -f config.status
 ./autogen.sh || echo done
 CFLAGS="-O3 -march=armv8-a+crypto+sha2+aes -Wall -flax-vector-conversions" ./configure  --with-curl 
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-armv8-aes-sha2

 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=armv8-a+crypto+sha2 -Wall -flax-vector-conversions" ./configure  --with-curl      
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-armv8-sha2

 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=armv8-a+crypto+aes -Wall -flax-vector-conversions" ./configure  --with-curl      
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-armv8-aes

 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=armv8-a -Wall -flax-vector-conversions" ./configure  --with-curl
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-armv8

 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=native -Wall -flax-vector-conversions" ./configure  --with-curl     
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
--- a/build-allarch.sh
+++ b/build-allarch.sh
@@ -13,7 +13,7 @@ rm -f config.status
 CFLAGS="-O3 -march=icelake-client -Wall" ./configure --with-curl
 # Rocketlake needs gcc-11
 #CFLAGS="-O3 -march=rocketlake -Wall" ./configure --with-curl
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-avx512-sha-vaes

@@ -34,7 +34,7 @@ rm -f config.status
 # Inclomplete list of Zen4 AVX512 extensions but includes all extensions used by cpuminer.
 CFLAGS="-O3 -march=znver3 -mavx512f -mavx512cd -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512bitalg -mavx512vpopcntdq -Wall" ./configure --with-curl
 #CFLAGS="-O3 -march=znver2 -mvaes -mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -Wall" ./configure --with-curl
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-zen4

@@ -43,7 +43,7 @@ make clean || echo clean
 rm -f config.status
 #CFLAGS="-O3 -march=znver2 -mvaes" ./configure --with-curl
 CFLAGS="-O3 -march=znver3 -fno-common " ./configure --with-curl
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-zen3

@@ -51,7 +51,7 @@ mv cpuminer cpuminer-zen3
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=skylake-avx512 -maes -Wall" ./configure --with-curl
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-avx512

@@ -60,7 +60,7 @@ make clean || echo done
 rm -f config.status
 # vaes doesn't include aes
 CFLAGS="-O3 -maes -mavx2 -msha -mvaes -Wall" ./configure --with-curl
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-avx2-sha-vaes

@@ -69,7 +69,7 @@ make clean || echo done
 rm -f config.status
 #CFLAGS="-O3 -march=znver1 -maes -Wall" ./configure --with-curl
 CFLAGS="-O3 -maes -mavx2 -msha -Wall" ./configure --with-curl
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-avx2-sha

@@ -78,7 +78,7 @@ make clean || echo clean
 rm -f config.status
 # GCC 9 doesn't include AES with core-avx2
 CFLAGS="-O3 -march=core-avx2 -maes -Wall" ./configure --with-curl
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-avx2

@@ -86,7 +86,7 @@ mv cpuminer cpuminer-avx2
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=corei7-avx -maes -Wall" ./configure --with-curl
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-avx

@@ -94,7 +94,7 @@ mv cpuminer cpuminer-avx
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=westmere -maes -Wall" ./configure --with-curl
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-aes-sse42

@@ -102,7 +102,7 @@ mv cpuminer cpuminer-aes-sse42
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=corei7 -Wall" ./configure --with-curl
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-sse42

@@ -110,7 +110,7 @@ mv cpuminer cpuminer-sse42
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=core2 -Wall" ./configure --with-curl
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-ssse3

@@ -118,7 +118,7 @@ mv cpuminer cpuminer-ssse3
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -msse2 -Wall" ./configure --with-curl
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-sse2

@@ -126,7 +126,7 @@ mv cpuminer cpuminer-sse2
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=x86-64 -Wall" ./configure --with-curl
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-x64

@@ -134,6 +134,6 @@ mv cpuminer cpuminer-x64
 make clean || echo done
 rm -f config.status
 CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
-make -j $nproc
+make -j $(nproc)
 strip -s cpuminer

--- a/build-armv8.sh
+++ b/build-armv8.sh
@@ -10,6 +10,6 @@ rm -f config.status
 CFLAGS="-O2 -march=armv8-a+crypto+sha2+aes -Wall -flax-vector-conversions" ./configure  --with-curl  --host=aarch64-cortexa76-elf --build=x86_64-pc-linux-gnu --target=aarch64-cortexa76-elf
 #CFLAGS="-O2 -march=armv8-a+crypto+sha2+aes -Wall -flax-vector-conversions" ./configure  --with-curl

-make -j $nproc
+make -j $(nproc)

 strip -s cpuminer
--- a/build-avx2.sh
+++ b/build-avx2.sh
@@ -22,6 +22,6 @@ rm -f config.status
 CFLAGS="-O3 -march=haswell -maes -Wall" ./configure --with-curl
 #CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl

-make -j $nproc
+make -j $(nproc)

 strip -s cpuminer
--- a/build-msys2.sh
+++ b/build-msys2.sh
@@ -6,5 +6,5 @@ make distclean || echo clean
 rm -f config.status
 ./autogen.sh || echo done
 CFLAGS="-O3 -march=native -Wall -D_WIN32_WINNT=0x0601" ./configure --with-curl
-make -j 4
+make -j $(nproc)
 strip -s cpuminer
--- a/build.sh
+++ b/build.sh
@@ -15,6 +15,6 @@ rm -f config.status
 #CFLAGS="-O3 -march=native -Wall" ./configure --with-curl --with-crypto=$HOME/usr
 CFLAGS="-O3 -march=native -Wall" ./configure --with-curl

-make -j $nproc
+make -j $(nproc)

 strip -s cpuminer
--- a/clean-all.sh
+++ b/clean-all.sh
@@ -2,7 +2,7 @@
 #
 # make clean and rm all the targetted executables.

-rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-avx2-sha cpuminer-sse42 cpuminer-ssse3 cpuminer-avx2-sha-vaes cpuminer-zen3 cpuminer-zen4 cpuminer-x64 > /dev/null
+rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-avx2-sha cpuminer-sse42 cpuminer-ssse3 cpuminer-avx2-sha-vaes cpuminer-zen3 cpuminer-zen4 cpuminer-x64 cpuminer-armv8 cpuminer-armv8-aes cpuminer-armv8-aes-sha2 cpuminer-armv8-sha2 > /dev/null

 rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512-sha.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-avx2-sha.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-avx2-sha-vaes.exe cpuminer-zen3.exe cpuminer-zen4.exe cpuminer-x64.exe > /dev/null

--- a/20
+++ b/20
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.15.
+# Generated by GNU Autoconf 2.71 for cpuminer-opt 24.1.
 #
 #
 # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -608,8 +608,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='cpuminer-opt'
 PACKAGE_TARNAME='cpuminer-opt'
-PACKAGE_VERSION='23.15'
-PACKAGE_STRING='cpuminer-opt 23.15'
+PACKAGE_VERSION='24.1'
+PACKAGE_STRING='cpuminer-opt 24.1'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''

@@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures cpuminer-opt 23.15 to adapt to many kinds of systems.
+\`configure' configures cpuminer-opt 24.1 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@@ -1432,7 +1432,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of cpuminer-opt 23.15:";;
+     short | recursive ) echo "Configuration of cpuminer-opt 24.1:";;
   esac
  cat <<\_ACEOF

@@ -1538,7 +1538,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-cpuminer-opt configure 23.15
+cpuminer-opt configure 24.1
 generated by GNU Autoconf 2.71

 Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by cpuminer-opt $as_me 23.15, which was
+It was created by cpuminer-opt $as_me 24.1, which was
 generated by GNU Autoconf 2.71.  Invocation command line was

  $ $0$ac_configure_args_raw
@@ -3593,7 +3593,7 @@ fi

 # Define the identity of the package.
 PACKAGE='cpuminer-opt'
- VERSION='23.15'
+ VERSION='24.1'


 printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by cpuminer-opt $as_me 23.15, which was
+This file was extended by cpuminer-opt $as_me 24.1, which was
 generated by GNU Autoconf 2.71.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-cpuminer-opt config.status 23.15
+cpuminer-opt config.status 24.1
 configured by $0, generated by GNU Autoconf 2.71,
  with options \\"\$ac_cs_config\\"

--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([cpuminer-opt], [23.15])
+AC_INIT([cpuminer-opt], [24.1])

 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
--- a/20
+++ b/20
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.14.
+# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.16.
 #
 #
 # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -608,8 +608,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='cpuminer-opt'
 PACKAGE_TARNAME='cpuminer-opt'
-PACKAGE_VERSION='23.14'
-PACKAGE_STRING='cpuminer-opt 23.14'
+PACKAGE_VERSION='23.16'
+PACKAGE_STRING='cpuminer-opt 23.16'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''

@@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures cpuminer-opt 23.14 to adapt to many kinds of systems.
+\`configure' configures cpuminer-opt 23.16 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@@ -1432,7 +1432,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of cpuminer-opt 23.14:";;
+     short | recursive ) echo "Configuration of cpuminer-opt 23.16:";;
   esac
  cat <<\_ACEOF

@@ -1538,7 +1538,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-cpuminer-opt configure 23.14
+cpuminer-opt configure 23.16
 generated by GNU Autoconf 2.71

 Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by cpuminer-opt $as_me 23.14, which was
+It was created by cpuminer-opt $as_me 23.16, which was
 generated by GNU Autoconf 2.71.  Invocation command line was

  $ $0$ac_configure_args_raw
@@ -3593,7 +3593,7 @@ fi

 # Define the identity of the package.
 PACKAGE='cpuminer-opt'
- VERSION='23.14'
+ VERSION='23.16'


 printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by cpuminer-opt $as_me 23.14, which was
+This file was extended by cpuminer-opt $as_me 23.16, which was
 generated by GNU Autoconf 2.71.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-cpuminer-opt config.status 23.14
+cpuminer-opt config.status 23.16
 configured by $0, generated by GNU Autoconf 2.71,
  with options \\"\$ac_cs_config\\"

--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -1294,7 +1294,7 @@ static int share_result( int result, struct work *work,
   const char *bell = !result && opt_bell ? &ASCII_BELL : "";
   applog( LOG_INFO, "%s%d %s%s %s%s %s%s %s%s%s, %.3f sec (%dms)",
           bell, my_stats.share_count, acol, ares, scol, sres, rcol, rres,
-           bcol, bres, CL_N, share_time, latency );
+           bcol, bres, use_colors ? CL_N : "", share_time, latency );
   if ( unlikely( !( opt_quiet || result || stale ) ) )
   {
      applog2( LOG_INFO, "%sReject reason: %s", bell, reason ? reason : "" );
--- a/dummy.cpp
+++ b/dummy.cpp
@@ -0,0 +1,6 @@
+// This file exists to force the use of g++ as the linker which in turn
+// links the math library with the inclusion of math.h. gcc will not 
+// automatically link math. Without this file linking will fail for m7m.c.
+// Linking math manually, allowing gcc to do the linking work on Linux
+// but on Windows it segfaults. Until that is solved this file must continue
+// to exist.
--- a/miner.h
+++ b/miner.h
@@ -185,13 +185,13 @@ static inline bool is_windows(void)
 */
 #endif

-static inline uint32_t swab32(uint32_t v)
+static inline uint32_t swab32(uint32_t x)
 {
 #ifdef WANT_BUILTIN_BSWAP
-   return __builtin_bswap32(v);
+   return __builtin_bswap32(x);
 #else
-   return ( (x << 24) & 0xff000000u ) | ( (x <<  8) & 0x00ff0000u )
-        | ( (x >>  8) & 0x0000ff00u ) | ( (x >> 24) & 0x000000ffu )
+   return ( ( (x) << 24 ) & 0xff000000u ) | ( ( (x) <<  8 ) & 0x00ff0000u )
+        | ( ( (x) >>  8 ) & 0x0000ff00u ) | ( ( (x) >> 24 ) & 0x000000ffu )


 //   return bswap_32(v);
--- a/simd-utils/intrlv.h
+++ b/simd-utils/intrlv.h
--- a/simd-utils/simd-128.h
+++ b/simd-utils/simd-128.h
@@ -141,7 +141,7 @@
 #define v128_aesdeclast                _mm_aesdeclast_si128
 #define v128_aesdeclast_nokey(v)       _mm_aesdeclast_si128( v, v128_zero )

-// Used instead if casting.
+// Used instead of casting.
 typedef union
 {
   v128_t   v128;
--- a/simd-utils/simd-256.h
+++ b/simd-utils/simd-256.h
@@ -429,6 +429,19 @@ static inline __m256i mm256_not( const __m256i v )

 #endif     // AVX512 else AVX2

+#if defined(__AVX2__)
+
+// 128 bit version of unpack
+#define v256_unpacklo128( v1, v0 )   _mm256_permute2x128_si256( v1, v0, 0x20 )
+#define v256_unpackhi128( v1, v0 )   _mm256_permute2x128_si256( v1, v0, 0x31 )
+
+#else
+
+#define v256_unpacklo128( v1, v0 )   _mm256_permute2f128_si256( v1, v0, 0x20 )
+#define v256_unpackhi128( v1, v0 )   _mm256_permute2f128_si256( v1, v0, 0x31 )
+
+#endif
+
 //
 // Cross lane shuffles
 //
--- a/simd-utils/simd-int.h
+++ b/simd-utils/simd-int.h
@@ -164,11 +164,10 @@ static inline uint32_t ror32( uint32_t a, const int c )
 // obsolete test
 // Compiler check for __int128 support
 // Configure also has a test for int128.
-#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
+//#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
  #define GCC_INT128 1
-#endif
+//#endif

-// obsolte test
 #if !defined(GCC_INT128)
  #warning "__int128 not supported, requires GCC-4.8 or newer."
 #endif
--- a/sysinfos.c
+++ b/sysinfos.c
@@ -14,6 +14,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "miner.h"
+#include "simd-utils.h"

 #if defined(__aarch64__) && !defined(__APPLE__)
 // for arm's "cpuid"
@@ -223,8 +224,8 @@ static inline int cpu_fanpercent()
 #define AVX512_F_Flag            (1<<16)
 #define AVX512_DQ_Flag           (1<<17)
 #define AVX512_IFMA_Flag         (1<<21)
-#define AVX512_PF_Flag           (1<<26)
-#define AVX512_ER_Flag           (1<<27)
+#define AVX512_PF_Flag           (1<<26)   // obsolete
+#define AVX512_ER_Flag           (1<<27)   // obsolete
 #define AVX512_CD_Flag           (1<<28)
 #define SHA_Flag                 (1<<29)
 #define AVX512_BW_Flag           (1<<30)
@@ -237,8 +238,8 @@ static inline int cpu_fanpercent()
 #define AVX512_BITALG_Flag       (1<<12)
 #define AVX512_VPOPCNTDQ_Flag    (1<<14)
 // EDX
-#define AVX512_4VNNIW_Flag       (1<< 2)
-#define AVX512_4FMAPS_Flag       (1<< 3)
+#define AVX512_4VNNIW_Flag       (1<< 2)   // obsolete
+#define AVX512_4FMAPS_Flag       (1<< 3)   // obsolete
 #define AVX512_VP2INTERSECT_Flag (1<< 8)
 #define AMX_BF16_Flag            (1<<22)
 #define AVX512_FP16_Flag         (1<<23)
@@ -557,10 +558,15 @@ static inline bool has_aes_ni()
 #elif defined(__aarch64__) && !defined(__APPLE__)
   if ( has_neon() )
   {
-      unsigned int cpu_info[4] = { 0 };
+#if defined(KERNEL_HWCAP_AES)
+      return true;
+#else
+      return false;
+#endif
+/*      unsigned int cpu_info[4] = { 0 };
      cpuid( 0, 0, cpu_info );
      return cpu_info[0] & HWCAP_AES;
-   }
+*/   }
   return false;
 #else
   return false;
@@ -602,10 +608,15 @@ static inline bool has_sha()
 #elif defined(__aarch64__) && !defined(__APPLE__)
    if ( has_neon() )
    {
-       unsigned int cpu_info[4] = { 0 };
+#if defined(KERNEL_HWCAP_SHA2)
+       return true;
+#else
+       return false;
+#endif
+/*       unsigned int cpu_info[4] = { 0 };
       cpuid( 0, 0, cpu_info );
       return cpu_info[0] & HWCAP_SHA2;
-    }
+*/    }
    return false;
 #else
    return false;
--- a/util.c
+++ b/util.c
@@ -2075,11 +2075,7 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
         const char *s = json_string_value( json_array_get( merkle_arr, i ) );
         if ( !s || strlen(s) != 64 )
         {
-            for ( int j = sctx->job.merkle_buf_size; j > 0; j-- )
-               free( sctx->job.merkle[i] );
-            free( sctx->job.merkle );
-            sctx->job.merkle_count =
-            sctx->job.merkle_buf_size = 0;
+            sctx->job.merkle_count = 0;
            pthread_mutex_unlock( &sctx->work_lock );
            applog( LOG_ERR, "Stratum notify: invalid Merkle branch" );
            goto out;