This commit is contained in:
Jay D Dee
2026-01-13 19:17:47 -05:00
parent 8f2f9ec3e9
commit b34565bfac
13 changed files with 117 additions and 58 deletions

View File

@@ -32,8 +32,6 @@ Requirements
32 bit CPUs are not supported. 32 bit CPUs are not supported.
Older CPUs are supported by open source cpuminer-multi by TPruvot but at reduced performance.
Mining on mobile devices that meet the requirements is not recommended due to the risk of Mining on mobile devices that meet the requirements is not recommended due to the risk of
overheating and damaging the battery. Mining has unlimited demand, it will push any device overheating and damaging the battery. Mining has unlimited demand, it will push any device
to or beyond its limits. There is also a fire risk with overheated lithium batteries. to or beyond its limits. There is also a fire risk with overheated lithium batteries.
@@ -75,6 +73,10 @@ If not what makes it happen or not happen?
Change Log Change Log
---------- ----------
v26.1
Fixed segfault in scrypt algo on some older CPUs.
v25.7 v25.7
Fixed a bug calculating TTF longer than 1 year. Fixed a bug calculating TTF longer than 1 year.

View File

@@ -1137,7 +1137,7 @@ do { \
} \ } \
} while (0) } while (0)
// v3 ternary logic, 8 instructions, 2 local vars // v4 ternary logic, 8 instructions, 2 local vars
#define SBOX( a, b, c, d ) \ #define SBOX( a, b, c, d ) \
{ \ { \
__m256i tb, td; \ __m256i tb, td; \

View File

@@ -37,8 +37,8 @@
#if defined(SIMD512) #if defined(SIMD512)
#define SCRYPT_THROUGHPUT 16 #define SCRYPT_THROUGHPUT 16
#elif defined(__SHA__) || defined(__ARM_FEATURE_SHA2) //#elif defined(__SHA__) || defined(__ARM_FEATURE_SHA2)
#define SCRYPT_THROUGHPUT 2 // #define SCRYPT_THROUGHPUT 2
#elif defined(__AVX2__) #elif defined(__AVX2__)
#define SCRYPT_THROUGHPUT 8 #define SCRYPT_THROUGHPUT 8
#elif defined(__SSE2__) || defined(__ARM_NEON) #elif defined(__SSE2__) || defined(__ARM_NEON)
@@ -162,7 +162,7 @@ static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate,
} }
#endif // throughput 1 #endif // throughput 1
//
#if defined(__SHA__) || defined(__ARM_FEATURE_SHA2) #if defined(__SHA__) || defined(__ARM_FEATURE_SHA2)
static inline void HMAC_SHA256_80_init_SHA_2BUF( const uint32_t *key0, static inline void HMAC_SHA256_80_init_SHA_2BUF( const uint32_t *key0,
@@ -1230,7 +1230,8 @@ static int scrypt_N_1_1_256_sha_2buf( const uint32_t *input,
#if ( SCRYPT_THROUGHPUT == 4 ) #if ( SCRYPT_THROUGHPUT == 4 )
#if defined(__SHA__) #if 0
//#if defined(__SHA__) || defined(__ARM_FEATURE_SHA2)
static int scrypt_N_1_1_256_4way_sha( const uint32_t *input, uint32_t *output, static int scrypt_N_1_1_256_4way_sha( const uint32_t *input, uint32_t *output,
uint32_t *midstate, int N, int thrid ) uint32_t *midstate, int N, int thrid )
@@ -1244,6 +1245,15 @@ static int scrypt_N_1_1_256_4way_sha( const uint32_t *input, uint32_t *output,
memcpy( tstate+16, midstate, 32 ); memcpy( tstate+16, midstate, 32 );
memcpy( tstate+24, midstate, 32 ); memcpy( tstate+24, midstate, 32 );
HMAC_SHA256_80_init_SHA_2BUF( input, input+20, tstate, tstate+8,
ostate, ostate+8 );
PBKDF2_SHA256_80_128_SHA_2BUF( tstate, tstate+8, ostate, ostate+8,
input, input+20, W, W+32 );
HMAC_SHA256_80_init_SHA_2BUF( input+40, input+60, tstate+16, tstate+24,
ostate+16, ostate+24 );
PBKDF2_SHA256_80_128_SHA_2BUF( tstate+16, tstate+24, ostate+16, ostate+24,
input+40, input+60, W+64, W+96 );
/*
HMAC_SHA256_80_init( input, tstate, ostate ); HMAC_SHA256_80_init( input, tstate, ostate );
PBKDF2_SHA256_80_128( tstate, ostate, input, W ); PBKDF2_SHA256_80_128( tstate, ostate, input, W );
HMAC_SHA256_80_init( input +20, tstate+ 8, ostate+ 8 ); HMAC_SHA256_80_init( input +20, tstate+ 8, ostate+ 8 );
@@ -1252,7 +1262,7 @@ static int scrypt_N_1_1_256_4way_sha( const uint32_t *input, uint32_t *output,
PBKDF2_SHA256_80_128( tstate+16, ostate+16, input +40, W+64 ); PBKDF2_SHA256_80_128( tstate+16, ostate+16, input +40, W+64 );
HMAC_SHA256_80_init( input +60, tstate+24, ostate+24 ); HMAC_SHA256_80_init( input +60, tstate+24, ostate+24 );
PBKDF2_SHA256_80_128( tstate+24, ostate+24, input +60, W+96 ); PBKDF2_SHA256_80_128( tstate+24, ostate+24, input +60, W+96 );
*/
/* /*
// Working Linear single threaded SIMD // Working Linear single threaded SIMD
scrypt_core_simd128( W, V, N ); scrypt_core_simd128( W, V, N );
@@ -1278,11 +1288,16 @@ static int scrypt_N_1_1_256_4way_sha( const uint32_t *input, uint32_t *output,
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thrid].restart ) return 0;
PBKDF2_SHA256_128_32_SHA_2BUF( tstate, tstate+ 8, ostate, ostate+ 8,
W, W+32, output, output+ 8 );
PBKDF2_SHA256_128_32_SHA_2BUF( tstate+16, tstate+24, ostate+16, ostate+24,
W+64, W+96, output+16, output+24 );
/*
PBKDF2_SHA256_128_32( tstate, ostate, W, output ); PBKDF2_SHA256_128_32( tstate, ostate, W, output );
PBKDF2_SHA256_128_32( tstate+ 8, ostate+ 8, W+32, output+ 8 ); PBKDF2_SHA256_128_32( tstate+ 8, ostate+ 8, W+32, output+ 8 );
PBKDF2_SHA256_128_32( tstate+16, ostate+16, W+64, output+16 ); PBKDF2_SHA256_128_32( tstate+16, ostate+16, W+64, output+16 );
PBKDF2_SHA256_128_32( tstate+24, ostate+24, W+96, output+24 ); PBKDF2_SHA256_128_32( tstate+24, ostate+24, W+96, output+24 );
*/
return 1; return 1;
} }
@@ -1390,13 +1405,13 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
rc = scrypt_N_1_1_256_8way( data, hash, midstate, opt_param_n, rc = scrypt_N_1_1_256_8way( data, hash, midstate, opt_param_n,
thr_id ); thr_id );
#elif ( SCRYPT_THROUGHPUT == 4 ) #elif ( SCRYPT_THROUGHPUT == 4 )
#if defined(__SHA__) || defined(__ARM_FEATURE_SHA2) // #if defined(__SHA__) || defined(__ARM_FEATURE_SHA2)
rc = scrypt_N_1_1_256_4way_sha( data, hash, midstate, opt_param_n, // rc = scrypt_N_1_1_256_4way_sha( data, hash, midstate, opt_param_n,
thr_id ); // thr_id );
#else // #else
rc = scrypt_N_1_1_256_4way( data, hash, midstate, opt_param_n, rc = scrypt_N_1_1_256_4way( data, hash, midstate, opt_param_n,
thr_id ); thr_id );
#endif // #endif
#elif ( SCRYPT_THROUGHPUT == 2 ) && ( defined(__SHA__) || defined(__ARM_FEATURE_SHA2) ) #elif ( SCRYPT_THROUGHPUT == 2 ) && ( defined(__SHA__) || defined(__ARM_FEATURE_SHA2) )
rc = scrypt_N_1_1_256_sha_2buf( data, hash, midstate, opt_param_n, rc = scrypt_N_1_1_256_sha_2buf( data, hash, midstate, opt_param_n,
thr_id ); thr_id );
@@ -1444,11 +1459,6 @@ bool scrypt_miner_thread_init( int thr_id )
bool register_scrypt_algo( algo_gate_t* gate ) bool register_scrypt_algo( algo_gate_t* gate )
{ {
#if defined(__SHA__) || defined(__ARM_FEATURE_SHA2)
gate->optimizations = SSE2_OPT | SSE42_OPT | AVX_OPT | SHA256_OPT | NEON_OPT;
#else
gate->optimizations = SSE2_OPT | SSE42_OPT | AVX_OPT | AVX2_OPT | AVX512_OPT | NEON_OPT;
#endif
gate->miner_thread_init =(void*)&scrypt_miner_thread_init; gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
gate->scanhash = (void*)&scanhash_scrypt; gate->scanhash = (void*)&scanhash_scrypt;
opt_target_factor = 65536.0; opt_target_factor = 65536.0;
@@ -1469,7 +1479,7 @@ bool register_scrypt_algo( algo_gate_t* gate )
case 8: // AVX2 case 8: // AVX2
if ( opt_param_n > 0x4000 ) if ( opt_param_n > 0x4000 )
scratchbuf_size = opt_param_n * 3 * 128; // 3 buf scratchbuf_size = opt_param_n * 3 * 128; // 3 buf
else else
scratchbuf_size = opt_param_n * 2 * 128; // 2 way scratchbuf_size = opt_param_n * 2 * 128; // 2 way
break; break;
case 4: // SSE2, NEON case 4: // SSE2, NEON
@@ -1477,8 +1487,9 @@ bool register_scrypt_algo( algo_gate_t* gate )
scratchbuf_size = opt_param_n * 2 * 128; // 2 buf scratchbuf_size = opt_param_n * 2 * 128; // 2 buf
else else
scratchbuf_size = opt_param_n * 4 * 128; // 4 way scratchbuf_size = opt_param_n * 4 * 128; // 4 way
break;
default: default:
scratchbuf_size = opt_param_n; // 1 way scratchbuf_size = opt_param_n * 128; // 1 way
} }
char t_units[4] = {0}; char t_units[4] = {0};

View File

@@ -56,7 +56,7 @@ make clean || echo clean
rm -f config.status rm -f config.status
CFLAGS="-O3 -march=armv8.2-a+crypto -Wall -flax-vector-conversions" ./configure --with-curl CFLAGS="-O3 -march=armv8.2-a+crypto -Wall -flax-vector-conversions" ./configure --with-curl
make -j $(nproc) make -j $(nproc)
mv cpuminer cpuminer-armv8-crypto mv cpuminer cpuminer-armv8.2-crypto
make clean || echo clean make clean || echo clean
rm -f config.status rm -f config.status

28
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 25.7. # Generated by GNU Autoconf 2.71 for cpuminer-opt 26.1.
# #
# #
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -608,8 +608,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='cpuminer-opt' PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='25.7' PACKAGE_VERSION='26.1'
PACKAGE_STRING='cpuminer-opt 25.7' PACKAGE_STRING='cpuminer-opt 26.1'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1359,7 +1359,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures cpuminer-opt 25.7 to adapt to many kinds of systems. \`configure' configures cpuminer-opt 26.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1431,7 +1431,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 25.7:";; short | recursive ) echo "Configuration of cpuminer-opt 26.1:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1536,7 +1536,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
cpuminer-opt configure 25.7 cpuminer-opt configure 26.1
generated by GNU Autoconf 2.71 generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc. Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1983,7 +1983,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 25.7, which was It was created by cpuminer-opt $as_me 26.1, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw $ $0$ac_configure_args_raw
@@ -3591,7 +3591,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='cpuminer-opt' PACKAGE='cpuminer-opt'
VERSION='25.7' VERSION='26.1'
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -5808,11 +5808,11 @@ if test x$ac_prog_cxx_stdcxx = xno
then : then :
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features" >&5
printf %s "checking for $CXX option to enable C++11 features... " >&6; } printf %s "checking for $CXX option to enable C++11 features... " >&6; }
if test ${ac_cv_prog_cxx_cxx11+y} if test ${ac_cv_prog_cxx_11+y}
then : then :
printf %s "(cached) " >&6 printf %s "(cached) " >&6
else $as_nop else $as_nop
ac_cv_prog_cxx_cxx11=no ac_cv_prog_cxx_11=no
ac_save_CXX=$CXX ac_save_CXX=$CXX
cat confdefs.h - <<_ACEOF >conftest.$ac_ext cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */ /* end confdefs.h. */
@@ -5854,11 +5854,11 @@ if test x$ac_prog_cxx_stdcxx = xno
then : then :
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features" >&5
printf %s "checking for $CXX option to enable C++98 features... " >&6; } printf %s "checking for $CXX option to enable C++98 features... " >&6; }
if test ${ac_cv_prog_cxx_cxx98+y} if test ${ac_cv_prog_cxx_98+y}
then : then :
printf %s "(cached) " >&6 printf %s "(cached) " >&6
else $as_nop else $as_nop
ac_cv_prog_cxx_cxx98=no ac_cv_prog_cxx_98=no
ac_save_CXX=$CXX ac_save_CXX=$CXX
cat confdefs.h - <<_ACEOF >conftest.$ac_ext cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */ /* end confdefs.h. */
@@ -7435,7 +7435,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by cpuminer-opt $as_me 25.7, which was This file was extended by cpuminer-opt $as_me 26.1, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -7503,7 +7503,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped' ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\ ac_cs_version="\\
cpuminer-opt config.status 25.7 cpuminer-opt config.status 26.1
configured by $0, generated by GNU Autoconf 2.71, configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [25.7]) AC_INIT([cpuminer-opt], [26.1])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.72 for cpuminer-opt 25.7. # Generated by GNU Autoconf 2.72 for cpuminer-opt 26.1.
# #
# #
# Copyright (C) 1992-1996, 1998-2017, 2020-2023 Free Software Foundation, # Copyright (C) 1992-1996, 1998-2017, 2020-2023 Free Software Foundation,
@@ -601,8 +601,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='cpuminer-opt' PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='25.7' PACKAGE_VERSION='26.1'
PACKAGE_STRING='cpuminer-opt 25.7' PACKAGE_STRING='cpuminer-opt 26.1'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1352,7 +1352,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
'configure' configures cpuminer-opt 25.7 to adapt to many kinds of systems. 'configure' configures cpuminer-opt 26.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1424,7 +1424,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 25.7:";; short | recursive ) echo "Configuration of cpuminer-opt 26.1:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1528,7 +1528,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
cpuminer-opt configure 25.7 cpuminer-opt configure 26.1
generated by GNU Autoconf 2.72 generated by GNU Autoconf 2.72
Copyright (C) 2023 Free Software Foundation, Inc. Copyright (C) 2023 Free Software Foundation, Inc.
@@ -1949,7 +1949,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 25.7, which was It was created by cpuminer-opt $as_me 26.1, which was
generated by GNU Autoconf 2.72. Invocation command line was generated by GNU Autoconf 2.72. Invocation command line was
$ $0$ac_configure_args_raw $ $0$ac_configure_args_raw
@@ -3768,7 +3768,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='cpuminer-opt' PACKAGE='cpuminer-opt'
VERSION='25.7' VERSION='26.1'
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -7581,7 +7581,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by cpuminer-opt $as_me 25.7, which was This file was extended by cpuminer-opt $as_me 26.1, which was
generated by GNU Autoconf 2.72. Invocation command line was generated by GNU Autoconf 2.72. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -7649,7 +7649,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped' ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\ ac_cs_version="\\
cpuminer-opt config.status 25.7 cpuminer-opt config.status 26.1
configured by $0, generated by GNU Autoconf 2.72, configured by $0, generated by GNU Autoconf 2.72,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -1232,7 +1232,7 @@ static int share_result( int result, struct work *work,
sprintf( ares, "A%d", accepted_share_count ); sprintf( ares, "A%d", accepted_share_count );
sprintf( bres, "B%d", solved_block_count ); sprintf( bres, "B%d", solved_block_count );
if ( reason ) if ( reason )
stale = strstr( reason, "job" ); stale = strstr( reason, "job" ) || strstr( reason, "Job" );
else if ( work ) else if ( work )
stale = work->data[ algo_gate.ntime_index ] stale = work->data[ algo_gate.ntime_index ]
!= g_work.data[ algo_gate.ntime_index ]; != g_work.data[ algo_gate.ntime_index ];

View File

@@ -642,6 +642,15 @@ static inline void v128_memcpy( v128_t *dst, const v128_t *src, const int n )
#define v128_shuflr32(v) _mm_shuffle_epi32( v, 0x39 ) #define v128_shuflr32(v) _mm_shuffle_epi32( v, 0x39 )
#define v128_shufll32(v) _mm_shuffle_epi32( v, 0x93 ) #define v128_shufll32(v) _mm_shuffle_epi32( v, 0x93 )
/* Zen6 AMD only
// Reverse bits in bytes
#if defined(__AVX512VL__) && defined(__AVX512BMM__)
#define v128_bitrev8 _mm_vbitrevb_epi8
#endif
*/
// Endian byte swap. // Endian byte swap.
#if defined(__SSSE3__) #if defined(__SSSE3__)

View File

@@ -409,6 +409,15 @@ static inline __m256i mm256_shuflr128_x8( const __m256i v, const int c )
{ return _mm256_alignr_epi8( v, v, c ); } { return _mm256_alignr_epi8( v, v, c ); }
*/ */
/* Zen6 AMD only
// Reverse bits in bytes
#if defined(__AVX512VL__) && defined(__AVX512BMM__)
#define mm256_bitrev8 _mm256_vbitrevb_epi8
#endif
*/
// Reverse byte order in elements, endian bswap. // Reverse byte order in elements, endian bswap.
#define mm256_bswap_64( v ) _mm256_shuffle_epi8( v, V256_BSWAP64 ) #define mm256_bswap_64( v ) _mm256_shuffle_epi8( v, V256_BSWAP64 )

View File

@@ -257,6 +257,15 @@ static inline void memcpy_512( __m512i *dst, const __m512i *src, const int n )
#endif #endif
*/ */
/* Zen6 AMD only
// Reverse bits in bytes
#if defined(__AVX512BMM__)
#define mm512_bitrev8 _mm512_vbitrevb_epi8
#endif
*/
// //
// Reverse byte order of packed elements, vectorized endian conversion. // Reverse byte order of packed elements, vectorized endian conversion.

View File

@@ -431,8 +431,10 @@ static inline uint32x4_t v128_shuflr32( uint32x4_t v )
static inline uint32x4_t v128_shufll32( uint32x4_t v ) static inline uint32x4_t v128_shufll32( uint32x4_t v )
{ return vextq_u32( v, v, 3 ); } { return vextq_u32( v, v, 3 ); }
// reverse bits in bytes, nothing like it in x86_64 /* not used
// reverse bits in bytes, nothing like it in x86_64 until Zen6
#define v128_bitrev8 vrbitq_u8 #define v128_bitrev8 vrbitq_u8
*/
// reverse byte order // reverse byte order
#define v128_bswap16(v) (uint16x8_t)vrev16q_u8( (uint8x16_t)(v) ) #define v128_bswap16(v) (uint16x8_t)vrev16q_u8( (uint8x16_t)(v) )

View File

@@ -16,8 +16,8 @@
#include "miner.h" #include "miner.h"
#include "simd-utils.h" #include "simd-utils.h"
// Missing on MinGW, MacOS // hwcap.h missing on MinGW, MacOS
#if defined(__aarch64__) && !defined(WIN32) && !defined(__APPLE__) #if defined(__aarch64__) && !(defined(WIN32) || defined(__APPLE__))
#define ARM_AUXV #define ARM_AUXV
#endif #endif
@@ -191,6 +191,7 @@ static inline int cpu_fanpercent()
#define CPU_INFO (1) #define CPU_INFO (1)
#define CACHE_TLB_DESCRIPTOR (2) #define CACHE_TLB_DESCRIPTOR (2)
#define EXTENDED_FEATURES (7) #define EXTENDED_FEATURES (7)
#define EXTENDED_FEATURE_ID (0x21)
#define AVX10_FEATURES (0x24) #define AVX10_FEATURES (0x24)
#define HIGHEST_EXT_FUNCTION (0x80000000) #define HIGHEST_EXT_FUNCTION (0x80000000)
#define EXTENDED_CPU_INFO (0x80000001) #define EXTENDED_CPU_INFO (0x80000001)
@@ -254,8 +255,8 @@ static inline int cpu_fanpercent()
#define AVX512_BF16_Flag (1<< 5) #define AVX512_BF16_Flag (1<< 5)
#define AMX_FP16_Flag (1<<21) #define AMX_FP16_Flag (1<<21)
#define AVX_IFMA_Flag (1<<23) #define AVX_IFMA_Flag (1<<23)
#define MOVRS_Flag (1<<31) // Both names are referenced in docs
#define AVX10_MOVRS_Flag (1<<31) #define AVX10_MOVRS_Flag (1<<31)
#define MOVRS_Flag (1<<31) // Both names are referenced in docs
// EDX // EDX
#define AVX_VNNI_INT8_Flag (1<< 4) #define AVX_VNNI_INT8_Flag (1<< 4)
#define AVX_NE_CONVERT_Flag (1<< 5) #define AVX_NE_CONVERT_Flag (1<< 5)
@@ -264,6 +265,10 @@ static inline int cpu_fanpercent()
#define AVX10_Flag (1<<19) #define AVX10_Flag (1<<19)
#define APX_F_Flag (1<<21) #define APX_F_Flag (1<<21)
// EXTENDED_FEATURE_ID: EAX=0x21, ECX=0
// EAX
#define AVX512_BMM_Flag (1<<23) // Zen6 AMD only
// AVX10_FEATURES: EAX=0x24, ECX=0 // AVX10_FEATURES: EAX=0x24, ECX=0
// EBX // EBX
#define AVX10_VERSION_mask 0xff // bits [7:0] #define AVX10_VERSION_mask 0xff // bits [7:0]
@@ -735,7 +740,7 @@ static inline bool has_avx512()
#endif #endif
} }
static inline bool has_vbmi() static inline bool has_avx512vbmi()
{ {
#if defined(__x86_64__) #if defined(__x86_64__)
unsigned int cpu_info[4] = { 0 }; unsigned int cpu_info[4] = { 0 };
@@ -746,7 +751,7 @@ static inline bool has_vbmi()
#endif #endif
} }
static inline bool has_vbmi2() static inline bool has_avx512vbmi2()
{ {
#if defined(__x86_64__) #if defined(__x86_64__)
unsigned int cpu_info[4] = { 0 }; unsigned int cpu_info[4] = { 0 };
@@ -757,6 +762,18 @@ static inline bool has_vbmi2()
#endif #endif
} }
// Zen6 AMD only
static inline bool has_avx512bmm()
{
#if defined(__x86_64__)
unsigned int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURE_ID, 0, cpu_info );
return cpu_info[ EAX_Reg ] & AVX512_BMM_Flag;
#else
return false;
#endif
}
static inline bool has_amx() static inline bool has_amx()
{ {
#if defined(__x86_64__) #if defined(__x86_64__)
@@ -817,10 +834,9 @@ static inline bool has_sveaes()
static inline bool has_sha256() static inline bool has_sha256()
{ {
#if defined(__x86_64__) #if defined(__x86_64__)
unsigned int cpu_info[4] = { 0 }; unsigned int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, 0, cpu_info ); cpuid( EXTENDED_FEATURES, 0, cpu_info );
return cpu_info[ EBX_Reg ] & SHA_Flag; return cpu_info[ EBX_Reg ] & SHA_Flag;
return false;
#elif defined(__aarch64__) && defined(HWCAP_SHA2) #elif defined(__aarch64__) && defined(HWCAP_SHA2)
// NEON SHA256 // NEON SHA256
unsigned int cpu_info[4] = { 0 }; unsigned int cpu_info[4] = { 0 };
@@ -851,6 +867,7 @@ static inline bool has_sha512()
#endif #endif
} }
// ARM64 only
static inline bool has_sha3() static inline bool has_sha3()
{ {
#if defined(__aarch64__) && defined(HWCAP_SHA3) #if defined(__aarch64__) && defined(HWCAP_SHA3)