This commit is contained in:
Jay D Dee
2024-04-16 21:31:35 -04:00
parent 9d3a46c355
commit 4f930574cc
33 changed files with 757 additions and 609 deletions

View File

@@ -16,6 +16,7 @@ bin_PROGRAMS = cpuminer
dist_man_MANS = cpuminer.1
cpuminer_SOURCES = \
dummy.cpp \
cpu-miner.c \
util.c \
api.c \
@@ -113,7 +114,6 @@ cpuminer_SOURCES = \
algo/lyra2/phi2-4way.c \
algo/lyra2/phi2.c \
algo/m7m/m7m.c \
algo/m7m/magimath.cpp \
algo/nist5/nist5-gate.c \
algo/nist5/nist5-4way.c \
algo/nist5/nist5.c \
@@ -289,7 +289,7 @@ if HAVE_WINDOWS
endif
cpuminer_LDFLAGS = @LDFLAGS@
cpuminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ -lgmp
cpuminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ -lgmp
cpuminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ $(ALL_INCLUDES)
cpuminer_CFLAGS = -Wno-pointer-sign -Wno-pointer-to-int-cast $(disable_flags)

View File

@@ -75,6 +75,15 @@ If not what makes it happen or not happen?
Change Log
----------
v24.1
#414: fix bug in merkle error handling.
#416: change $nproc to $(nproc) in build scripts.
#420: change some inline function definitions to static inline.
#413: Fix formatting error for share result log when using no-color.
Faster 2 way interleaving.
Cleanup sha256 architecture targetting.
v23.15
Fixed x11gost (sib) algo for all architectures, broken in v3.23.4.

View File

@@ -107,16 +107,16 @@ typedef uint32_t set_t;
// AVX10_256 is compatible with AVX2 + VAES
// return set containing all elements from sets a & b
inline set_t set_union ( set_t a, set_t b ) { return a | b; }
static inline set_t set_union ( set_t a, set_t b ) { return a | b; }
// return set contained common elements from sets a & b
inline set_t set_intsec ( set_t a, set_t b) { return a & b; }
static inline set_t set_intsec ( set_t a, set_t b) { return a & b; }
// all elements in set a are included in set b
inline bool set_incl ( set_t a, set_t b ) { return (a & b) == a; }
static inline bool set_incl ( set_t a, set_t b ) { return (a & b) == a; }
// no elements in set a are included in set b
inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
static inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
typedef struct
{

View File

@@ -21,7 +21,7 @@
#define EPS1 DBL_EPSILON
#define EPS2 3.0e-11
inline double exp_n( double xt )
static inline double exp_n( double xt )
{
if ( xt < -700.0 )
return 0;
@@ -33,7 +33,7 @@ inline double exp_n( double xt )
return exp( xt );
}
inline double exp_n2( double x1, double x2 )
static inline double exp_n2( double x1, double x2 )
{
double p1 = -700., p2 = -37., p3 = -0.8e-8, p4 = 0.8e-8,
p5 = 37., p6 = 700.;

View File

@@ -1,75 +0,0 @@
// Copyright (c) 2014 The Magi developers
// Distributed under the MIT/X11 software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <iostream>
#include <cfloat>
#include <limits>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "magimath.h"
#define EPS1 (std::numeric_limits<double>::epsilon())
#define EPS2 3.0e-11
static void gauleg(double x1, double x2, double x[], double w[], const int n)
{
int m,j,i;
double z1, z, xm, xl, pp, p3, p2, p1;
m=(n+1)/2;
xm=0.5*(x2+x1);
xl=0.5*(x2-x1);
for (i=1;i<=m;i++) {
z=cos(3.141592654*(i-0.25)/(n+0.5));
do {
p1=1.0;
p2=0.0;
for (j=1;j<=n;j++) {
p3=p2;
p2=p1;
p1=((2.0*j-1.0)*z*p2-(j-1.0)*p3)/j;
}
pp=n*(z*p1-p2)/(z*z-1.0);
z1=z;
z=z1-p1/pp;
} while (fabs(z-z1) > EPS2);
x[i]=xm-xl*z;
x[n+1-i]=xm+xl*z;
w[i]=2.0*xl/((1.0-z*z)*pp*pp);
w[n+1-i]=w[i];
}
}
static double GaussianQuad_N(double func(const double), const double a2, const double b2, const int NptGQ)
{
double s=0.0;
#ifdef _MSC_VER
#define SW_DIVS 23
double x[SW_DIVS+1], w[SW_DIVS+1];
#else
double x[NptGQ+1], w[NptGQ+1];
#endif
gauleg(a2, b2, x, w, NptGQ);
for (int j=1; j<=NptGQ; j++) {
s += w[j]*func(x[j]);
}
return s;
}
static double swit_(double wvnmb)
{
return pow( (5.55243*(exp_n(-0.3*wvnmb/15.762) - exp_n(-0.6*wvnmb/15.762)))*wvnmb, 0.5)
/ 1034.66 * pow(sin(wvnmb/65.), 2.);
}
uint32_t sw_(int nnounce, int divs)
{
double wmax = ((sqrt((double)(nnounce))*(1.+EPS1))/450+100);
return ((uint32_t)(GaussianQuad_N(swit_, 0., wmax, divs)*(1.+EPS1)*1.e6));
}

View File

@@ -1,54 +0,0 @@
// Copyright (c) 2014 The Magi developers
// Distributed under the MIT/X11 software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef MAGI_MATH_H
#define MAGI_MATH_H
#include <math.h>
#ifdef __cplusplus
extern "C" {
#endif
uint32_t sw_(int nnounce, int divs);
#ifdef __cplusplus
}
#endif
inline double exp_n(double xt)
{
double p1 = -700.0, p3 = -0.8e-8, p4 = 0.8e-8, p6 = 700.0;
if(xt < p1)
return 0;
else if(xt > p6)
return 1e200;
else if(xt > p3 && xt < p4)
return (1.0 + xt);
else
return exp(xt);
}
// 1 / (1 + exp(x1-x2))
inline double exp_n2(double x1, double x2)
{
double p1 = -700., p2 = -37., p3 = -0.8e-8, p4 = 0.8e-8, p5 = 37., p6 = 700.;
double xt = x1 - x2;
if (xt < p1+1.e-200)
return 1.;
else if (xt > p1 && xt < p2 + 1.e-200)
return ( 1. - exp(xt) );
else if (xt > p2 && xt < p3 + 1.e-200)
return ( 1. / (1. + exp(xt)) );
else if (xt > p3 && xt < p4)
return ( 1. / (2. + xt) );
else if (xt > p4 - 1.e-200 && xt < p5)
return ( exp(-xt) / (1. + exp(-xt)) );
else if (xt > p5 - 1.e-200 && xt < p6)
return ( exp(-xt) );
else //if (xt > p6 - 1.e-200)
return 0.;
}
#endif

View File

@@ -205,7 +205,7 @@ void sha1_x86_sha_transform_be( uint32_t *state_out, const void *input,
#endif
#if defined(__aarch64__) && defined(__ARM_FEATURE_SHA2)
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2)
#define sha1_neon_rounds( state_out, data, state_in ) \
{ \

View File

@@ -1,6 +1,6 @@
#include "sha256-hash.h"
#if ( defined(__x86_64__) && defined(__SHA__) ) || defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2)
#if ( defined(__x86_64__) && defined(__SHA__) ) || ( defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2) )
static const uint32_t SHA256_IV[8] =
{
@@ -189,7 +189,7 @@ static const uint32_t SHA256_IV[8] =
_mm_store_si128( (__m128i*) &state_out[4], STATE1 ); \
}
void sha256_opt_transform_le( uint32_t *state_out, const void *input,
void sha256_x86_sha_transform_le( uint32_t *state_out, const void *input,
const uint32_t *state_in )
{
#define load_msg( m, i ) casti_v128( m, i )
@@ -197,7 +197,7 @@ void sha256_opt_transform_le( uint32_t *state_out, const void *input,
#undef load_msg
}
void sha256_opt_transform_be( uint32_t *state_out, const void *input,
void sha256_x86_sha_transform_be( uint32_t *state_out, const void *input,
const uint32_t *state_in )
{
#define load_msg( m, i ) v128_bswap32( casti_v128( m, i ) )
@@ -517,7 +517,7 @@ void sha256_opt_transform_be( uint32_t *state_out, const void *input,
_mm_store_si128( (__m128i*) &out_Y[4], STATE1_Y ); \
}
void sha256_ni2x_transform_le( uint32_t *out_X, uint32_t*out_Y,
void sha256_x86_x2sha_transform_le( uint32_t *out_X, uint32_t*out_Y,
const void *msg_X, const void *msg_Y,
const uint32_t *in_X, const uint32_t *in_Y )
{
@@ -526,7 +526,7 @@ void sha256_ni2x_transform_le( uint32_t *out_X, uint32_t*out_Y,
#undef load_msg
}
void sha256_ni2x_transform_be( uint32_t *out_X, uint32_t*out_Y,
void sha256_x86_x2sha_transform_be( uint32_t *out_X, uint32_t*out_Y,
const void *msg_X, const void *msg_Y,
const uint32_t *in_X, const uint32_t *in_Y )
{
@@ -541,7 +541,7 @@ void sha256_ni2x_transform_be( uint32_t *out_X, uint32_t*out_Y,
// The goal is to avoid any redundant processing in final. Prehash is almost
// 4 rounds total, only missing the final addition of the nonce.
// Nonce must be set to zero for prehash.
void sha256_ni_prehash_3rounds( uint32_t *ostate, const void *msg,
void sha256_x86_sha_prehash_3rounds( uint32_t *ostate, const void *msg,
uint32_t *sstate, const uint32_t *istate )
{
__m128i STATE0, STATE1, MSG, TMP;
@@ -569,7 +569,7 @@ void sha256_ni_prehash_3rounds( uint32_t *ostate, const void *msg,
casti_m128i( ostate, 1 ) = STATE1;
}
void sha256_ni2x_final_rounds( uint32_t *out_X, uint32_t *out_Y,
void sha256_x86_x2sha_final_rounds( uint32_t *out_X, uint32_t *out_Y,
const void *msg_X, const void *msg_Y,
const uint32_t *state_mid_X, const uint32_t *state_mid_Y,
const uint32_t *state_save_X, const uint32_t *state_save_Y )

View File

@@ -5,27 +5,21 @@
#include "simd-utils.h"
#include "cpuminer-config.h"
// generic interface
static const uint32_t SHA256_IV[8];
#if defined(__x86_64__) && defined(__SHA__)
typedef struct
{
unsigned char buf[64]; /* first field, for alignment */
unsigned char buf[64];
uint32_t state[8];
uint64_t count;
} sha256_context __attribute__((aligned(64)));
static const uint32_t SHA256_IV[8];
void sha256_full( void *hash, const void *data, size_t len );
void sha256_update( sha256_context *ctx, const void *data, size_t len );
void sha256_final( sha256_context *ctx, void *hash );
void sha256_ctx_init( sha256_context *ctx );
void sha256_transform_le( uint32_t *state_out, const uint32_t *data,
const uint32_t *state_in );
void sha256_transform_be( uint32_t *state_out, const uint32_t *data,
const uint32_t *state_in );
#if defined(__x86_64__) && defined(__SHA__)
void sha256_x86_sha_transform_le( uint32_t *state_out, const void *input,
const uint32_t *state_in );
@@ -50,14 +44,6 @@ void sha256_x86_x2sha_final_rounds( uint32_t *state_out_X, uint32_t *state_out_Y
const uint32_t *state_mid_X, const uint32_t *state_mid_Y,
const uint32_t *state_save_X, const uint32_t *state_save_Y );
// Temporary during name transition
#define sha256_opt_transform_le sha256_x86_sha_transform_le
#define sha256_opt_transform_be sha256_x86_sha_transform_be
#define sha256_ni2x_transform_le sha256_x86_x2sha_transform_le
#define sha256_ni2x_transform_be sha256_x86_x2sha_transform_be
#define sha256_ni_prehash_3rounds sha256_x86_sha_prehash_3rounds
#define sha256_ni2x_final_rounds sha256_x86_x2sha_final_rounds
// generic API
#define sha256_transform_le sha256_x86_sha_transform_le
#define sha256_transform_be sha256_x86_sha_transform_be
@@ -68,6 +54,20 @@ void sha256_x86_x2sha_final_rounds( uint32_t *state_out_X, uint32_t *state_out_Y
#elif defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2)
// SHA-256 AArch64 with NEON & SHA2
typedef struct
{
unsigned char buf[64];
uint32_t state[8];
uint64_t count;
} sha256_context __attribute__((aligned(64)));
void sha256_full( void *hash, const void *data, size_t len );
void sha256_update( sha256_context *ctx, const void *data, size_t len );
void sha256_final( sha256_context *ctx, void *hash );
void sha256_ctx_init( sha256_context *ctx );
void sha256_neon_sha_transform_be( uint32_t *state_out, const void *input,
const uint32_t *state_in );
void sha256_neon_sha_transform_le( uint32_t *state_out, const void *input,
@@ -89,14 +89,6 @@ void sha256_neon_x2sha_final_rounds( uint32_t *state_out_X,
const uint32_t *state_mid_X, const uint32_t *state_mid_Y,
const uint32_t *state_save_X, const uint32_t *state_save_Y );
// Temporary during name transition
#define sha256_transform_le sha256_neon_sha_transform_le
#define sha256_transform_be sha256_neon_sha_transform_be
#define sha256_2x_transform_le sha256_neon_x2sha_transform_le
#define sha256_2x_transform_be sha256_neon_x2sha_transform_be
#define sha256_prehash_3rounds sha256_neon_sha_prehash_3rounds
#define sha256_2x_final_rounds sha256_neon_x2sha_final_rounds
// generic API
#define sha256_transform_le sha256_neon_sha_transform_le
#define sha256_transform_be sha256_neon_sha_transform_be
@@ -106,9 +98,11 @@ void sha256_neon_x2sha_final_rounds( uint32_t *state_out_X,
#define sha256_2x_final_rounds sha256_neon_x2sha_final_rounds
#else
// without HW acceleration...
#include "sph_sha2.h"
#define sha256_context sph_sha256_context
#define sha256_full sph_sha256_full
#define sha256_ctx_init sph_sha256_init
#define sha256_update sph_sha256
@@ -117,12 +111,11 @@ void sha256_neon_x2sha_final_rounds( uint32_t *state_out_X,
#define sha256_transform_be sph_sha256_transform_be
#define sha256_prehash_3rounds sph_sha256_prehash_3rounds
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
// SHA-256 16 way
// SHA-256 16 way x86_64
typedef struct
{
@@ -147,7 +140,7 @@ void sha256_16x32_final_rounds( __m512i *state_out, const __m512i *data,
int sha256_16x32_transform_le_short( __m512i *state_out, const __m512i *data,
const __m512i *state_in, const uint32_t *target );
#define sha256_16way_context sha256_16x32_context
#define sha256_16way_context sha256_16x32_context
#define sha256_16way_init sha256_16x32_init
#define sha256_16way_update sha256_16x32_update
#define sha256_16way_close sha256_16x32_close
@@ -162,7 +155,7 @@ int sha256_16x32_transform_le_short( __m512i *state_out, const __m512i *data,
#if defined (__AVX2__)
// SHA-256 8 way
// SHA-256 8 way x86_64
typedef struct
{
@@ -201,7 +194,7 @@ int sha256_8x32_transform_le_short( __m256i *state_out, const __m256i *data,
#endif // AVX2
// SHA-256 4 way
// SHA-256 4 way x86_64 with SSE2 or AArch64 with NEON
typedef struct
{

View File

@@ -16,14 +16,14 @@ bool register_timetravel_algo( algo_gate_t* gate )
return true;
};
inline void tt_swap( int *a, int *b )
static inline void tt_swap( int *a, int *b )
{
int c = *a;
*a = *b;
*b = c;
}
inline void reverse( int *pbegin, int *pend )
static inline void reverse( int *pbegin, int *pend )
{
while ( (pbegin != pend) && (pbegin != --pend) )
{

View File

@@ -16,14 +16,14 @@ bool register_timetravel10_algo( algo_gate_t* gate )
return true;
};
inline void tt10_swap( int *a, int *b )
static inline void tt10_swap( int *a, int *b )
{
int c = *a;
*a = *b;
*b = c;
}
inline void reverse( int *pbegin, int *pend )
static inline void reverse( int *pbegin, int *pend )
{
while ( (pbegin != pend) && (pbegin != --pend) )
{

View File

@@ -730,7 +730,7 @@ typedef union _x16rv2_4way_context_overlay x16rv2_4way_context_overlay;
static __thread x16rv2_4way_context_overlay x16rv2_ctx;
// Pad the 24 bytes tiger hash to 64 bytes
inline void padtiger512( uint32_t* hash )
static inline void padtiger512( uint32_t* hash )
{
for ( int i = 6; i < 16; i++ ) hash[i] = 0;
}

View File

@@ -33,7 +33,7 @@ union _x16rv2_context_overlay
typedef union _x16rv2_context_overlay x16rv2_context_overlay;
// Pad the 24 bytes tiger hash to 64 bytes
inline void padtiger512(uint32_t* hash) {
static inline void padtiger512(uint32_t* hash) {
for (int i = (24/4); i < (64/4); i++) hash[i] = 0;
}

View File

@@ -9,6 +9,6 @@ rm -f config.status
CFLAGS="-O3 -march=native -Wall -flax-vector-conversions" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer

View File

@@ -10,33 +10,33 @@ make distclean || echo clean
rm -f config.status
./autogen.sh || echo done
CFLAGS="-O3 -march=armv8-a+crypto+sha2+aes -Wall -flax-vector-conversions" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-armv8-aes-sha2
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=armv8-a+crypto+sha2 -Wall -flax-vector-conversions" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-armv8-sha2
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=armv8-a+crypto+aes -Wall -flax-vector-conversions" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-armv8-aes
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=armv8-a -Wall -flax-vector-conversions" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-armv8
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=native -Wall -flax-vector-conversions" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer

View File

@@ -13,7 +13,7 @@ rm -f config.status
CFLAGS="-O3 -march=icelake-client -Wall" ./configure --with-curl
# Rocketlake needs gcc-11
#CFLAGS="-O3 -march=rocketlake -Wall" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-avx512-sha-vaes
@@ -34,7 +34,7 @@ rm -f config.status
# Inclomplete list of Zen4 AVX512 extensions but includes all extensions used by cpuminer.
CFLAGS="-O3 -march=znver3 -mavx512f -mavx512cd -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512bitalg -mavx512vpopcntdq -Wall" ./configure --with-curl
#CFLAGS="-O3 -march=znver2 -mvaes -mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -Wall" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-zen4
@@ -43,7 +43,7 @@ make clean || echo clean
rm -f config.status
#CFLAGS="-O3 -march=znver2 -mvaes" ./configure --with-curl
CFLAGS="-O3 -march=znver3 -fno-common " ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-zen3
@@ -51,7 +51,7 @@ mv cpuminer cpuminer-zen3
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=skylake-avx512 -maes -Wall" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-avx512
@@ -60,7 +60,7 @@ make clean || echo done
rm -f config.status
# vaes doesn't include aes
CFLAGS="-O3 -maes -mavx2 -msha -mvaes -Wall" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-avx2-sha-vaes
@@ -69,7 +69,7 @@ make clean || echo done
rm -f config.status
#CFLAGS="-O3 -march=znver1 -maes -Wall" ./configure --with-curl
CFLAGS="-O3 -maes -mavx2 -msha -Wall" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-avx2-sha
@@ -78,7 +78,7 @@ make clean || echo clean
rm -f config.status
# GCC 9 doesn't include AES with core-avx2
CFLAGS="-O3 -march=core-avx2 -maes -Wall" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-avx2
@@ -86,7 +86,7 @@ mv cpuminer cpuminer-avx2
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=corei7-avx -maes -Wall" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-avx
@@ -94,7 +94,7 @@ mv cpuminer cpuminer-avx
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=westmere -maes -Wall" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-aes-sse42
@@ -102,7 +102,7 @@ mv cpuminer cpuminer-aes-sse42
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=corei7 -Wall" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-sse42
@@ -110,7 +110,7 @@ mv cpuminer cpuminer-sse42
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=core2 -Wall" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-ssse3
@@ -118,7 +118,7 @@ mv cpuminer cpuminer-ssse3
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -msse2 -Wall" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-sse2
@@ -126,7 +126,7 @@ mv cpuminer cpuminer-sse2
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=x86-64 -Wall" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-x64
@@ -134,6 +134,6 @@ mv cpuminer cpuminer-x64
make clean || echo done
rm -f config.status
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer

View File

@@ -10,6 +10,6 @@ rm -f config.status
CFLAGS="-O2 -march=armv8-a+crypto+sha2+aes -Wall -flax-vector-conversions" ./configure --with-curl --host=aarch64-cortexa76-elf --build=x86_64-pc-linux-gnu --target=aarch64-cortexa76-elf
#CFLAGS="-O2 -march=armv8-a+crypto+sha2+aes -Wall -flax-vector-conversions" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer

View File

@@ -22,6 +22,6 @@ rm -f config.status
CFLAGS="-O3 -march=haswell -maes -Wall" ./configure --with-curl
#CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer

View File

@@ -6,5 +6,5 @@ make distclean || echo clean
rm -f config.status
./autogen.sh || echo done
CFLAGS="-O3 -march=native -Wall -D_WIN32_WINNT=0x0601" ./configure --with-curl
make -j 4
make -j $(nproc)
strip -s cpuminer

View File

@@ -15,6 +15,6 @@ rm -f config.status
#CFLAGS="-O3 -march=native -Wall" ./configure --with-curl --with-crypto=$HOME/usr
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
make -j $nproc
make -j $(nproc)
strip -s cpuminer

View File

@@ -2,7 +2,7 @@
#
# make clean and rm all the targetted executables.
rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-avx2-sha cpuminer-sse42 cpuminer-ssse3 cpuminer-avx2-sha-vaes cpuminer-zen3 cpuminer-zen4 cpuminer-x64 > /dev/null
rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-avx2-sha cpuminer-sse42 cpuminer-ssse3 cpuminer-avx2-sha-vaes cpuminer-zen3 cpuminer-zen4 cpuminer-x64 cpuminer-armv8 cpuminer-armv8-aes cpuminer-armv8-aes-sha2 cpuminer-armv8-sha2 > /dev/null
rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512-sha.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-avx2-sha.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-avx2-sha-vaes.exe cpuminer-zen3.exe cpuminer-zen4.exe cpuminer-x64.exe > /dev/null

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.15.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 24.1.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -608,8 +608,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='23.15'
PACKAGE_STRING='cpuminer-opt 23.15'
PACKAGE_VERSION='24.1'
PACKAGE_STRING='cpuminer-opt 24.1'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 23.15 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 24.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1432,7 +1432,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 23.15:";;
short | recursive ) echo "Configuration of cpuminer-opt 24.1:";;
esac
cat <<\_ACEOF
@@ -1538,7 +1538,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 23.15
cpuminer-opt configure 24.1
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 23.15, which was
It was created by cpuminer-opt $as_me 24.1, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
@@ -3593,7 +3593,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='23.15'
VERSION='24.1'
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 23.15, which was
This file was extended by cpuminer-opt $as_me 24.1, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
cpuminer-opt config.status 23.15
cpuminer-opt config.status 24.1
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [23.15])
AC_INIT([cpuminer-opt], [24.1])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.14.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.16.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -608,8 +608,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='23.14'
PACKAGE_STRING='cpuminer-opt 23.14'
PACKAGE_VERSION='23.16'
PACKAGE_STRING='cpuminer-opt 23.16'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 23.14 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 23.16 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1432,7 +1432,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 23.14:";;
short | recursive ) echo "Configuration of cpuminer-opt 23.16:";;
esac
cat <<\_ACEOF
@@ -1538,7 +1538,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 23.14
cpuminer-opt configure 23.16
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 23.14, which was
It was created by cpuminer-opt $as_me 23.16, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
@@ -3593,7 +3593,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='23.14'
VERSION='23.16'
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 23.14, which was
This file was extended by cpuminer-opt $as_me 23.16, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
cpuminer-opt config.status 23.14
cpuminer-opt config.status 23.16
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"

View File

@@ -1294,7 +1294,7 @@ static int share_result( int result, struct work *work,
const char *bell = !result && opt_bell ? &ASCII_BELL : "";
applog( LOG_INFO, "%s%d %s%s %s%s %s%s %s%s%s, %.3f sec (%dms)",
bell, my_stats.share_count, acol, ares, scol, sres, rcol, rres,
bcol, bres, CL_N, share_time, latency );
bcol, bres, use_colors ? CL_N : "", share_time, latency );
if ( unlikely( !( opt_quiet || result || stale ) ) )
{
applog2( LOG_INFO, "%sReject reason: %s", bell, reason ? reason : "" );

6
dummy.cpp Normal file
View File

@@ -0,0 +1,6 @@
// This file exists to force the use of g++ as the linker which in turn
// links the math library with the inclusion of math.h. gcc will not
// automatically link math. Without this file linking will fail for m7m.c.
// Linking math manually, allowing gcc to do the linking work on Linux
// but on Windows it segfaults. Until that is solved this file must continue
// to exist.

View File

@@ -185,13 +185,13 @@ static inline bool is_windows(void)
*/
#endif
static inline uint32_t swab32(uint32_t v)
static inline uint32_t swab32(uint32_t x)
{
#ifdef WANT_BUILTIN_BSWAP
return __builtin_bswap32(v);
return __builtin_bswap32(x);
#else
return ( (x << 24) & 0xff000000u ) | ( (x << 8) & 0x00ff0000u )
| ( (x >> 8) & 0x0000ff00u ) | ( (x >> 24) & 0x000000ffu )
return ( ( (x) << 24 ) & 0xff000000u ) | ( ( (x) << 8 ) & 0x00ff0000u )
| ( ( (x) >> 8 ) & 0x0000ff00u ) | ( ( (x) >> 24 ) & 0x000000ffu )
// return bswap_32(v);

File diff suppressed because it is too large Load Diff

View File

@@ -141,7 +141,7 @@
#define v128_aesdeclast _mm_aesdeclast_si128
#define v128_aesdeclast_nokey(v) _mm_aesdeclast_si128( v, v128_zero )
// Used instead if casting.
// Used instead of casting.
typedef union
{
v128_t v128;

View File

@@ -429,6 +429,19 @@ static inline __m256i mm256_not( const __m256i v )
#endif // AVX512 else AVX2
#if defined(__AVX2__)
// 128 bit version of unpack
#define v256_unpacklo128( v1, v0 ) _mm256_permute2x128_si256( v1, v0, 0x20 )
#define v256_unpackhi128( v1, v0 ) _mm256_permute2x128_si256( v1, v0, 0x31 )
#else
#define v256_unpacklo128( v1, v0 ) _mm256_permute2f128_si256( v1, v0, 0x20 )
#define v256_unpackhi128( v1, v0 ) _mm256_permute2f128_si256( v1, v0, 0x31 )
#endif
//
// Cross lane shuffles
//

View File

@@ -164,11 +164,10 @@ static inline uint32_t ror32( uint32_t a, const int c )
// obsolete test
// Compiler check for __int128 support
// Configure also has a test for int128.
#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
//#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
#define GCC_INT128 1
#endif
//#endif
// obsolte test
#if !defined(GCC_INT128)
#warning "__int128 not supported, requires GCC-4.8 or newer."
#endif

View File

@@ -14,6 +14,7 @@
#include <stdlib.h>
#include <string.h>
#include "miner.h"
#include "simd-utils.h"
#if defined(__aarch64__) && !defined(__APPLE__)
// for arm's "cpuid"
@@ -223,8 +224,8 @@ static inline int cpu_fanpercent()
#define AVX512_F_Flag (1<<16)
#define AVX512_DQ_Flag (1<<17)
#define AVX512_IFMA_Flag (1<<21)
#define AVX512_PF_Flag (1<<26)
#define AVX512_ER_Flag (1<<27)
#define AVX512_PF_Flag (1<<26) // obsolete
#define AVX512_ER_Flag (1<<27) // obsolete
#define AVX512_CD_Flag (1<<28)
#define SHA_Flag (1<<29)
#define AVX512_BW_Flag (1<<30)
@@ -237,8 +238,8 @@ static inline int cpu_fanpercent()
#define AVX512_BITALG_Flag (1<<12)
#define AVX512_VPOPCNTDQ_Flag (1<<14)
// EDX
#define AVX512_4VNNIW_Flag (1<< 2)
#define AVX512_4FMAPS_Flag (1<< 3)
#define AVX512_4VNNIW_Flag (1<< 2) // obsolete
#define AVX512_4FMAPS_Flag (1<< 3) // obsolete
#define AVX512_VP2INTERSECT_Flag (1<< 8)
#define AMX_BF16_Flag (1<<22)
#define AVX512_FP16_Flag (1<<23)
@@ -557,10 +558,15 @@ static inline bool has_aes_ni()
#elif defined(__aarch64__) && !defined(__APPLE__)
if ( has_neon() )
{
unsigned int cpu_info[4] = { 0 };
#if defined(KERNEL_HWCAP_AES)
return true;
#else
return false;
#endif
/* unsigned int cpu_info[4] = { 0 };
cpuid( 0, 0, cpu_info );
return cpu_info[0] & HWCAP_AES;
}
*/ }
return false;
#else
return false;
@@ -602,10 +608,15 @@ static inline bool has_sha()
#elif defined(__aarch64__) && !defined(__APPLE__)
if ( has_neon() )
{
unsigned int cpu_info[4] = { 0 };
#if defined(KERNEL_HWCAP_SHA2)
return true;
#else
return false;
#endif
/* unsigned int cpu_info[4] = { 0 };
cpuid( 0, 0, cpu_info );
return cpu_info[0] & HWCAP_SHA2;
}
*/ }
return false;
#else
return false;

6
util.c
View File

@@ -2075,11 +2075,7 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
const char *s = json_string_value( json_array_get( merkle_arr, i ) );
if ( !s || strlen(s) != 64 )
{
for ( int j = sctx->job.merkle_buf_size; j > 0; j-- )
free( sctx->job.merkle[i] );
free( sctx->job.merkle );
sctx->job.merkle_count =
sctx->job.merkle_buf_size = 0;
sctx->job.merkle_count = 0;
pthread_mutex_unlock( &sctx->work_lock );
applog( LOG_ERR, "Stratum notify: invalid Merkle branch" );
goto out;