mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v24.5
This commit is contained in:
@@ -166,7 +166,6 @@ cpuminer_SOURCES = \
|
|||||||
algo/shavite/sph-shavite-aesni.c \
|
algo/shavite/sph-shavite-aesni.c \
|
||||||
algo/shavite/shavite-hash-2way.c \
|
algo/shavite/shavite-hash-2way.c \
|
||||||
algo/shavite/shavite-hash-4way.c \
|
algo/shavite/shavite-hash-4way.c \
|
||||||
algo/shavite/shavite.c \
|
|
||||||
algo/simd/nist.c \
|
algo/simd/nist.c \
|
||||||
algo/simd/vector.c \
|
algo/simd/vector.c \
|
||||||
algo/simd/sph_simd.c \
|
algo/simd/sph_simd.c \
|
||||||
|
@@ -75,6 +75,12 @@ If not what makes it happen or not happen?
|
|||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
v24.5
|
||||||
|
|
||||||
|
Fix MinGW compile error after MSys2 upgrade to GCC-14.2.
|
||||||
|
#427: GBT: Improved handling of new work.
|
||||||
|
Removed shavite3 algo.
|
||||||
|
|
||||||
v24.4
|
v24.4
|
||||||
|
|
||||||
x86_64: fixed a bug in ornot macro for AVX2 which broke some algos in v24.2.
|
x86_64: fixed a bug in ornot macro for AVX2 which broke some algos in v24.2.
|
||||||
|
@@ -263,8 +263,8 @@ static void init_algo_gate( algo_gate_t* gate )
|
|||||||
gate->build_block_header = (void*)&std_build_block_header;
|
gate->build_block_header = (void*)&std_build_block_header;
|
||||||
gate->build_extraheader = (void*)&std_build_extraheader;
|
gate->build_extraheader = (void*)&std_build_extraheader;
|
||||||
gate->set_work_data_endian = (void*)&do_nothing;
|
gate->set_work_data_endian = (void*)&do_nothing;
|
||||||
gate->resync_threads = (void*)&do_nothing;
|
// gate->resync_threads = (void*)&do_nothing;
|
||||||
gate->do_this_thread = (void*)&return_true;
|
// gate->do_this_thread = (void*)&return_true;
|
||||||
gate->longpoll_rpc_call = (void*)&std_longpoll_rpc_call;
|
gate->longpoll_rpc_call = (void*)&std_longpoll_rpc_call;
|
||||||
gate->get_work_data_size = (void*)&std_get_work_data_size;
|
gate->get_work_data_size = (void*)&std_get_work_data_size;
|
||||||
gate->optimizations = EMPTY_SET;
|
gate->optimizations = EMPTY_SET;
|
||||||
@@ -340,7 +340,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
|||||||
case ALGO_SHA256T: rc = register_sha256t_algo ( gate ); break;
|
case ALGO_SHA256T: rc = register_sha256t_algo ( gate ); break;
|
||||||
case ALGO_SHA3D: rc = register_sha3d_algo ( gate ); break;
|
case ALGO_SHA3D: rc = register_sha3d_algo ( gate ); break;
|
||||||
case ALGO_SHA512256D: rc = register_sha512256d_algo ( gate ); break;
|
case ALGO_SHA512256D: rc = register_sha512256d_algo ( gate ); break;
|
||||||
case ALGO_SHAVITE3: rc = register_shavite_algo ( gate ); break;
|
|
||||||
case ALGO_SKEIN: rc = register_skein_algo ( gate ); break;
|
case ALGO_SKEIN: rc = register_skein_algo ( gate ); break;
|
||||||
case ALGO_SKEIN2: rc = register_skein2_algo ( gate ); break;
|
case ALGO_SKEIN2: rc = register_skein2_algo ( gate ); break;
|
||||||
case ALGO_SKUNK: rc = register_skunk_algo ( gate ); break;
|
case ALGO_SKUNK: rc = register_skunk_algo ( gate ); break;
|
||||||
|
@@ -165,10 +165,10 @@ char* ( *malloc_txs_request ) ( struct work* );
|
|||||||
void ( *set_work_data_endian ) ( struct work* );
|
void ( *set_work_data_endian ) ( struct work* );
|
||||||
|
|
||||||
// Diverge mining threads
|
// Diverge mining threads
|
||||||
bool ( *do_this_thread ) ( int );
|
//bool ( *do_this_thread ) ( int );
|
||||||
|
|
||||||
// After do_this_thread
|
// After do_this_thread
|
||||||
void ( *resync_threads ) ( int, struct work* );
|
//void ( *resync_threads ) ( int, struct work* );
|
||||||
|
|
||||||
json_t* ( *longpoll_rpc_call ) ( CURL*, int*, char* );
|
json_t* ( *longpoll_rpc_call ) ( CURL*, int*, char* );
|
||||||
|
|
||||||
|
2
api.c
2
api.c
@@ -531,7 +531,7 @@ static void api()
|
|||||||
time_t bindstart;
|
time_t bindstart;
|
||||||
struct sockaddr_in serv;
|
struct sockaddr_in serv;
|
||||||
struct sockaddr_in cli;
|
struct sockaddr_in cli;
|
||||||
socklen_t clisiz;
|
uint32_t clisiz;
|
||||||
bool addrok = false;
|
bool addrok = false;
|
||||||
long long counter;
|
long long counter;
|
||||||
char *result;
|
char *result;
|
||||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.71 for cpuminer-opt 24.4.
|
# Generated by GNU Autoconf 2.71 for cpuminer-opt 24.5.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
||||||
@@ -608,8 +608,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='cpuminer-opt'
|
PACKAGE_NAME='cpuminer-opt'
|
||||||
PACKAGE_TARNAME='cpuminer-opt'
|
PACKAGE_TARNAME='cpuminer-opt'
|
||||||
PACKAGE_VERSION='24.4'
|
PACKAGE_VERSION='24.5'
|
||||||
PACKAGE_STRING='cpuminer-opt 24.4'
|
PACKAGE_STRING='cpuminer-opt 24.5'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures cpuminer-opt 24.4 to adapt to many kinds of systems.
|
\`configure' configures cpuminer-opt 24.5 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1432,7 +1432,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of cpuminer-opt 24.4:";;
|
short | recursive ) echo "Configuration of cpuminer-opt 24.5:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1538,7 +1538,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
cpuminer-opt configure 24.4
|
cpuminer-opt configure 24.5
|
||||||
generated by GNU Autoconf 2.71
|
generated by GNU Autoconf 2.71
|
||||||
|
|
||||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||||
@@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by cpuminer-opt $as_me 24.4, which was
|
It was created by cpuminer-opt $as_me 24.5, which was
|
||||||
generated by GNU Autoconf 2.71. Invocation command line was
|
generated by GNU Autoconf 2.71. Invocation command line was
|
||||||
|
|
||||||
$ $0$ac_configure_args_raw
|
$ $0$ac_configure_args_raw
|
||||||
@@ -3593,7 +3593,7 @@ fi
|
|||||||
|
|
||||||
# Define the identity of the package.
|
# Define the identity of the package.
|
||||||
PACKAGE='cpuminer-opt'
|
PACKAGE='cpuminer-opt'
|
||||||
VERSION='24.4'
|
VERSION='24.5'
|
||||||
|
|
||||||
|
|
||||||
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
|
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
|
||||||
@@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by cpuminer-opt $as_me 24.4, which was
|
This file was extended by cpuminer-opt $as_me 24.5, which was
|
||||||
generated by GNU Autoconf 2.71. Invocation command line was
|
generated by GNU Autoconf 2.71. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config='$ac_cs_config_escaped'
|
ac_cs_config='$ac_cs_config_escaped'
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
cpuminer-opt config.status 24.4
|
cpuminer-opt config.status 24.5
|
||||||
configured by $0, generated by GNU Autoconf 2.71,
|
configured by $0, generated by GNU Autoconf 2.71,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
AC_INIT([cpuminer-opt], [24.4])
|
AC_INIT([cpuminer-opt], [24.5])
|
||||||
|
|
||||||
AC_PREREQ([2.59c])
|
AC_PREREQ([2.59c])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_SYSTEM
|
||||||
|
4323
configure~
4323
configure~
File diff suppressed because it is too large
Load Diff
42
cpu-miner.c
42
cpu-miner.c
@@ -1592,12 +1592,12 @@ start:
|
|||||||
|
|
||||||
applog( LOG_BLUE, "New Block %d, Tx %d, Net Diff %.5g, Ntime %08x",
|
applog( LOG_BLUE, "New Block %d, Tx %d, Net Diff %.5g, Ntime %08x",
|
||||||
work->height, work->tx_count, net_diff,
|
work->height, work->tx_count, net_diff,
|
||||||
work->data[ algo_gate.ntime_index ] );
|
bswap_32( work->data[ algo_gate.ntime_index ] ) );
|
||||||
}
|
}
|
||||||
else if ( memcmp( &work->data[1], &g_work.data[1], 32 ) )
|
else if ( memcmp( work->data, g_work.data, algo_gate.work_cmp_size ) )
|
||||||
applog( LOG_BLUE, "New Work: Block %d, Tx %d, Net Diff %.5g, Ntime %08x",
|
applog( LOG_BLUE, "New Work: Block %d, Tx %d, Net Diff %.5g, Ntime %08x",
|
||||||
work->height, work->tx_count, net_diff,
|
work->height, work->tx_count, net_diff,
|
||||||
work->data[ algo_gate.ntime_index ] );
|
bswap_32( work->data[ algo_gate.ntime_index ] ) );
|
||||||
else
|
else
|
||||||
new_work = false;
|
new_work = false;
|
||||||
|
|
||||||
@@ -2139,7 +2139,7 @@ static void *miner_thread( void *userdata )
|
|||||||
// uint32_t end_nonce = opt_benchmark
|
// uint32_t end_nonce = opt_benchmark
|
||||||
// ? ( 0xffffffffU / opt_n_threads ) * (thr_id + 1) - 0x20
|
// ? ( 0xffffffffU / opt_n_threads ) * (thr_id + 1) - 0x20
|
||||||
// : 0;
|
// : 0;
|
||||||
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20;
|
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - opt_n_threads;
|
||||||
|
|
||||||
memset( &work, 0, sizeof(work) );
|
memset( &work, 0, sizeof(work) );
|
||||||
|
|
||||||
@@ -2206,8 +2206,6 @@ static void *miner_thread( void *userdata )
|
|||||||
// int64_t max64 = 1000;
|
// int64_t max64 = 1000;
|
||||||
int nonce_found = 0;
|
int nonce_found = 0;
|
||||||
|
|
||||||
// if ( likely( algo_gate.do_this_thread( thr_id ) ) )
|
|
||||||
// {
|
|
||||||
if ( have_stratum )
|
if ( have_stratum )
|
||||||
{
|
{
|
||||||
while ( unlikely( stratum_down ) )
|
while ( unlikely( stratum_down ) )
|
||||||
@@ -2221,8 +2219,8 @@ static void *miner_thread( void *userdata )
|
|||||||
{
|
{
|
||||||
if ( !thr_id )
|
if ( !thr_id )
|
||||||
{
|
{
|
||||||
applog( LOG_WARNING, "nonce range exhausted, extranonce not subscribed" );
|
applog( LOG_WARNING, "Nonce range exhausted, extranonce not subscribed." );
|
||||||
applog( LOG_WARNING, "waiting for new work...");
|
applog( LOG_WARNING, "Waiting for new work...");
|
||||||
}
|
}
|
||||||
while ( !work_restart[thr_id].restart )
|
while ( !work_restart[thr_id].restart )
|
||||||
sleep ( 1 );
|
sleep ( 1 );
|
||||||
@@ -2231,9 +2229,16 @@ static void *miner_thread( void *userdata )
|
|||||||
}
|
}
|
||||||
else if ( !opt_benchmark ) // GBT or getwork
|
else if ( !opt_benchmark ) // GBT or getwork
|
||||||
{
|
{
|
||||||
pthread_rwlock_wrlock( &g_work_lock );
|
// max64 is used to set end_nonce to match the scantime.
|
||||||
|
// It also factors the nonce range to end the scan when nonces are
|
||||||
|
// exhausted. In either case needing new work can be assumed.
|
||||||
|
// Only problem is every thread will call get_work.
|
||||||
|
// First thread resets scantime blocking all subsequent threads
|
||||||
|
// from fetching new work.
|
||||||
|
|
||||||
if ( ( ( time(NULL) - g_work_time ) >= opt_scantime )
|
pthread_rwlock_wrlock( &g_work_lock );
|
||||||
|
const time_t now = time(NULL);
|
||||||
|
if ( ( ( now - g_work_time ) >= opt_scantime )
|
||||||
|| ( *nonceptr >= end_nonce ) )
|
|| ( *nonceptr >= end_nonce ) )
|
||||||
{
|
{
|
||||||
if ( unlikely( !get_work( mythr, &g_work ) ) )
|
if ( unlikely( !get_work( mythr, &g_work ) ) )
|
||||||
@@ -2242,10 +2247,8 @@ static void *miner_thread( void *userdata )
|
|||||||
applog( LOG_ERR, "work retrieval failed, exiting miner thread %d", thr_id );
|
applog( LOG_ERR, "work retrieval failed, exiting miner thread %d", thr_id );
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
g_work_time = time(NULL);
|
g_work_time = now;
|
||||||
// restart_threads();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_rwlock_unlock( &g_work_lock );
|
pthread_rwlock_unlock( &g_work_lock );
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2256,9 +2259,6 @@ static void *miner_thread( void *userdata )
|
|||||||
|
|
||||||
pthread_rwlock_unlock( &g_work_lock );
|
pthread_rwlock_unlock( &g_work_lock );
|
||||||
|
|
||||||
// } // do_this_thread
|
|
||||||
// algo_gate.resync_threads( thr_id, &work );
|
|
||||||
|
|
||||||
// conditional mining
|
// conditional mining
|
||||||
if ( unlikely( !wanna_mine( thr_id ) ) )
|
if ( unlikely( !wanna_mine( thr_id ) ) )
|
||||||
{
|
{
|
||||||
@@ -2315,12 +2315,6 @@ static void *miner_thread( void *userdata )
|
|||||||
gettimeofday( (struct timeval *) &tv_start, NULL );
|
gettimeofday( (struct timeval *) &tv_start, NULL );
|
||||||
|
|
||||||
// Scan for nonce
|
// Scan for nonce
|
||||||
// nonce_found = scanhash_sha256dt_ref( &work, max_nonce, &hashes_done,
|
|
||||||
// mythr );
|
|
||||||
// nonce_found = scanhash_sha256dt_4x32( &work, max_nonce, &hashes_done,
|
|
||||||
// mythr );
|
|
||||||
|
|
||||||
|
|
||||||
nonce_found = algo_gate.scanhash( &work, max_nonce, &hashes_done,
|
nonce_found = algo_gate.scanhash( &work, max_nonce, &hashes_done,
|
||||||
mythr );
|
mythr );
|
||||||
|
|
||||||
@@ -2342,8 +2336,8 @@ static void *miner_thread( void *userdata )
|
|||||||
// If unsubmiited nonce(s) found, submit now.
|
// If unsubmiited nonce(s) found, submit now.
|
||||||
if ( unlikely( nonce_found && !opt_benchmark ) )
|
if ( unlikely( nonce_found && !opt_benchmark ) )
|
||||||
{
|
{
|
||||||
// applog( LOG_WARNING, "BUG: See RELEASE_NOTES for reporting bugs. Algo = %s.",
|
applog( LOG_WARNING, "BUG: See RELEASE_NOTES for reporting bugs. Algo = %s.",
|
||||||
// algo_names[ opt_algo ] );
|
algo_names[ opt_algo ] );
|
||||||
if ( !submit_work( mythr, &work ) )
|
if ( !submit_work( mythr, &work ) )
|
||||||
{
|
{
|
||||||
applog( LOG_WARNING, "Failed to submit share." );
|
applog( LOG_WARNING, "Failed to submit share." );
|
||||||
|
3
miner.h
3
miner.h
@@ -644,7 +644,6 @@ enum algos {
|
|||||||
ALGO_SHA256T,
|
ALGO_SHA256T,
|
||||||
ALGO_SHA3D,
|
ALGO_SHA3D,
|
||||||
ALGO_SHA512256D,
|
ALGO_SHA512256D,
|
||||||
ALGO_SHAVITE3,
|
|
||||||
ALGO_SKEIN,
|
ALGO_SKEIN,
|
||||||
ALGO_SKEIN2,
|
ALGO_SKEIN2,
|
||||||
ALGO_SKUNK,
|
ALGO_SKUNK,
|
||||||
@@ -740,7 +739,6 @@ static const char* const algo_names[] = {
|
|||||||
"sha256t",
|
"sha256t",
|
||||||
"sha3d",
|
"sha3d",
|
||||||
"sha512256d",
|
"sha512256d",
|
||||||
"shavite3",
|
|
||||||
"skein",
|
"skein",
|
||||||
"skein2",
|
"skein2",
|
||||||
"skunk",
|
"skunk",
|
||||||
@@ -904,7 +902,6 @@ Options:\n\
|
|||||||
sha256t Triple SHA-256, Onecoin (OC)\n\
|
sha256t Triple SHA-256, Onecoin (OC)\n\
|
||||||
sha3d Double Keccak256 (BSHA3)\n\
|
sha3d Double Keccak256 (BSHA3)\n\
|
||||||
sha512256d Double SHA-512 (Radiant)\n\
|
sha512256d Double SHA-512 (Radiant)\n\
|
||||||
shavite3 Shavite3\n\
|
|
||||||
skein Skein+Sha (Skeincoin)\n\
|
skein Skein+Sha (Skeincoin)\n\
|
||||||
skein2 Double Skein (Woodcoin)\n\
|
skein2 Double Skein (Woodcoin)\n\
|
||||||
skunk Signatum (SIGT)\n\
|
skunk Signatum (SIGT)\n\
|
||||||
|
42
simd-utils.h
42
simd-utils.h
@@ -141,9 +141,40 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
// SIMD512: Use 512, 256 & 128 bit vectors, excludes AVX512VBMI
|
// GCC-14.1: the AVX512 macros are defined even when compiled with only
|
||||||
// VL256: Include AVX512VL instructions on 256 & 128 bit vectors
|
// -mavx10.1-256, causing compile errors in AVX512 code. Only with
|
||||||
// VBMI: Include AVX512VBMI instructions on all vectors.
|
// -mavx10.1-512 does it compile successfully.
|
||||||
|
// __EVEX512__ is set only when compiled with -mavx10.1-512.
|
||||||
|
// Adding -fno-evex512 doesn't help.
|
||||||
|
// Building with -mapxf fails on a CPU without APX because configure can't
|
||||||
|
// run its test program.
|
||||||
|
/*
|
||||||
|
// Test for macros
|
||||||
|
#ifdef __AVX10__
|
||||||
|
#warning "__AVX10__"
|
||||||
|
#endif
|
||||||
|
#ifdef __AVX10_1__
|
||||||
|
#warning "__AVX10_1__"
|
||||||
|
#endif
|
||||||
|
#ifdef __AVX10_1_256__
|
||||||
|
#warning "__AVX10_1_256__"
|
||||||
|
#endif
|
||||||
|
#ifdef __AVX10_1_512__
|
||||||
|
#warning "__AVX10_1_512__"
|
||||||
|
#endif
|
||||||
|
#ifdef __EVEX512__
|
||||||
|
#warning "__EVEX512__"
|
||||||
|
#endif
|
||||||
|
*/
|
||||||
|
|
||||||
|
// AVX10 complicates vector support by adding AVX512 features to CPUs without 512 bit
|
||||||
|
// vector support. AVX10.1 is just a renaming of AVX512 and is only available for
|
||||||
|
// Intel P-core only CPUs. AVX10.2 adds support for E-cores that don't support 512 bit
|
||||||
|
// vectors. The following macros simplify things.
|
||||||
|
// SIMD512: Use 512, 256 & 128 bit vectors, AVX512VBMI is not included and must be
|
||||||
|
// tested seperately.
|
||||||
|
// VL256: Include AVX512VL instructions for 256 & 128 bit vectors.
|
||||||
|
// VBMI: Include AVX512VBMI instructions for supported vector lengths.
|
||||||
|
|
||||||
// AVX10 can exist without support for 512 bit vectors.
|
// AVX10 can exist without support for 512 bit vectors.
|
||||||
#if defined(__AVX10_1_512__)
|
#if defined(__AVX10_1_512__)
|
||||||
@@ -153,8 +184,9 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// AVX512VL instructions applied to 256 & 128 bit vectors is supported with
|
// AVX512VL instructions applied to 256 & 128 bit vectors is supported with
|
||||||
// either AVX512VL or any version of AVX10.
|
// either AVX512VL or AVX10. Support for CPUs without 512 bit vectors is available
|
||||||
#if defined(__AVX10_1__)
|
// with AVX10.2.
|
||||||
|
#if defined(__AVX10_2__) || defined(__AVX10_1_512__)
|
||||||
#define VL256 1
|
#define VL256 1
|
||||||
#elif defined(__AVX512VL__)
|
#elif defined(__AVX512VL__)
|
||||||
#define VL256 1
|
#define VL256 1
|
||||||
|
@@ -32,6 +32,14 @@
|
|||||||
// Intrinsics automatically promote from REX to VEX when AVX is available
|
// Intrinsics automatically promote from REX to VEX when AVX is available
|
||||||
// but ASM needs to be done manually.
|
// but ASM needs to be done manually.
|
||||||
//
|
//
|
||||||
|
// APX supports EGPR which adds 16 more GPRs and 3 operand instructions.
|
||||||
|
// This may affect ASM that include instructions that are superseded by APX
|
||||||
|
// versions and are therefore incompatible with APX.
|
||||||
|
// As a result GCC-14 disables EGPR by default and can be enabled with
|
||||||
|
// "-mapx-inline-asm-use-gpr32"
|
||||||
|
//TODO
|
||||||
|
// Some ASM functions may need to be updated to support EGPR with APX.
|
||||||
|
//
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
// New architecturally agnostic syntax:
|
// New architecturally agnostic syntax:
|
||||||
@@ -164,7 +172,7 @@ typedef union
|
|||||||
// necessary the cvt, set, or set1 intrinsics can be used allowing the
|
// necessary the cvt, set, or set1 intrinsics can be used allowing the
|
||||||
// compiler to exploit new features to produce optimum code.
|
// compiler to exploit new features to produce optimum code.
|
||||||
// Currently only used internally and by Luffa.
|
// Currently only used internally and by Luffa.
|
||||||
|
// It also has implications for APX EGPR feature.
|
||||||
|
|
||||||
#define v128_mov64 _mm_cvtsi64_si128
|
#define v128_mov64 _mm_cvtsi64_si128
|
||||||
#define v128_mov32 _mm_cvtsi32_si128
|
#define v128_mov32 _mm_cvtsi32_si128
|
||||||
|
@@ -125,7 +125,7 @@ static inline __m512i mm512_perm_128( const __m512i v, const int c )
|
|||||||
// Pseudo constants.
|
// Pseudo constants.
|
||||||
#define m512_zero _mm512_setzero_si512()
|
#define m512_zero _mm512_setzero_si512()
|
||||||
|
|
||||||
// use asm to avoid compiler warning for unitialized local
|
// use asm to avoid compiler warning for uninitialized local
|
||||||
static inline __m512i mm512_neg1_fn()
|
static inline __m512i mm512_neg1_fn()
|
||||||
{
|
{
|
||||||
__m512i v;
|
__m512i v;
|
||||||
|
@@ -11,6 +11,17 @@
|
|||||||
// to support 2 way parallel hashing using MMX, or NEON for 32 bit hash
|
// to support 2 way parallel hashing using MMX, or NEON for 32 bit hash
|
||||||
// functions, but hasn't been implementedwas never implemented.
|
// functions, but hasn't been implementedwas never implemented.
|
||||||
//
|
//
|
||||||
|
// MMX is being deprecated by compilers, all intrinsics will be converted to use SSE
|
||||||
|
// registers and instructions. MMX will still be available using ASM.
|
||||||
|
// For backward compatibility it's likely the compiler won't allow mixing explicit SSE
|
||||||
|
// with promoted MMX. It is therefore preferable to implement all 64 bit vector code
|
||||||
|
// using explicit SSE with the upper 64 bits being ignored.
|
||||||
|
// Using SSE for 64 bit vectors will complicate loading arrays from memory which will
|
||||||
|
// always load 128 bits. Odd indexes will need to be extracted from the upper 64 bits
|
||||||
|
// of the even index SSE register.
|
||||||
|
// In most cases the exiting 4x32 SSE code can be used with 2 lanes being ignored
|
||||||
|
// making ths file obsolete.
|
||||||
|
|
||||||
|
|
||||||
#define v64_t __m64
|
#define v64_t __m64
|
||||||
#define v64u32_t v64_t
|
#define v64u32_t v64_t
|
||||||
|
25
simd-utils/simd-sve.h
Normal file
25
simd-utils/simd-sve.h
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
// Placeholder for now.
|
||||||
|
//
|
||||||
|
// This file will hold AArch64 SVE code, a replecement for NEON that uses vector length
|
||||||
|
// agnostic instructions. This means the same code can be used on CPUs with different
|
||||||
|
// SVE vector register lengths. This is not good for vectorized hashing.
|
||||||
|
// Optimum hash is sensitive to the vector register length with different code
|
||||||
|
// used for different register sizes. On X86_64 the vector length is tied to the CPU
|
||||||
|
// feature making it simple and efficient to handle different lengths although it
|
||||||
|
// results in multiple executables. Theoretically SVE could use a single executable for
|
||||||
|
// any vector length.
|
||||||
|
//
|
||||||
|
// With the SVE vector length only known at run time it resultis in run time overhead
|
||||||
|
// to test the vector length. Theoretically it could be tested at program loading and
|
||||||
|
// appropriate libraries loaded. However I don't know if this can be done and if so
|
||||||
|
// how to do it.
|
||||||
|
//
|
||||||
|
// SVE is not expected to be used for 128 bit vectors as it does not provide any
|
||||||
|
// advantages over NEON. However, it may be implemented for testing purposes
|
||||||
|
// because CPU with registers larger than 128 bits are currently very rare and very
|
||||||
|
// expensive server class CPUs.
|
||||||
|
//
|
||||||
|
// N-way parallel hashing could be the best use of SVE, usimg the same code for all
|
||||||
|
// vector lengths with the only variable being the number of lanes. This will still
|
||||||
|
// require run time checking but should be lighter than substituting functions.
|
||||||
|
|
43
sysinfos.c
43
sysinfos.c
@@ -169,17 +169,17 @@ static inline int cpu_fanpercent()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// CPUID
|
// x86_64 CPUID
|
||||||
|
|
||||||
// This list is incomplete, it only contains features of interest to cpuminer.
|
// This list is incomplete, it only contains features of interest to cpuminer.
|
||||||
// refer to http://en.wikipedia.org/wiki/CPUID for details.
|
// refer to http://en.wikipedia.org/wiki/CPUID for details.
|
||||||
|
|
||||||
// AVX10 compatibility notes
|
// AVX10 compatibility notes
|
||||||
//
|
//
|
||||||
// Notation used: AVX10i.[version]_[vectorwidth]
|
// Display format: AVX10.[version]-[vectorwidth]
|
||||||
// AVX10.1_512 is a rebranding of AVX512 and is effectively the AVX* superset
|
// AVX10.1-512 is a rebranding of AVX512 and is effectively the AVX* superset
|
||||||
// with full 512 bit vector support.
|
// with full 512 bit vector support.
|
||||||
// AVX10.2_256 is effectively AVX2 + AVX512_VL, all AVX512 instructions and
|
// AVX10.2-256 is effectively AVX2 + AVX512_VL, all AVX512 instructions and
|
||||||
// features applied only to 256 bit and 128 bit vectors.
|
// features applied only to 256 bit and 128 bit vectors.
|
||||||
// Future AVX10 versions will add new instructions and features.
|
// Future AVX10 versions will add new instructions and features.
|
||||||
|
|
||||||
@@ -321,12 +321,12 @@ static inline void cpuid( unsigned int leaf, unsigned int subleaf,
|
|||||||
unsigned int output[4] )
|
unsigned int output[4] )
|
||||||
{
|
{
|
||||||
#if defined(AT_HWCAP)
|
#if defined(AT_HWCAP)
|
||||||
output[0] = getauxval(AT_HWCAP);
|
output[0] = getauxval( AT_HWCAP );
|
||||||
#else
|
#else
|
||||||
output[0] = 0;
|
output[0] = 0;
|
||||||
#endif
|
#endif
|
||||||
#if defined(AT_HWCAP2)
|
#if defined(AT_HWCAP2)
|
||||||
output[1] = getauxval(AT_HWCAP2);
|
output[1] = getauxval( AT_HWCAP2 );
|
||||||
#else
|
#else
|
||||||
output[1] = 0;
|
output[1] = 0;
|
||||||
#endif
|
#endif
|
||||||
@@ -508,29 +508,6 @@ static inline void cpu_getmodelid(char *outbuf, size_t maxsz)
|
|||||||
#endif
|
#endif
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// GCC-14.1: the AVX512 macros are defined even when compiled with only
|
|
||||||
// -mavx10.1-256, causing compile errors in AVX512 code. Only with
|
|
||||||
// -mavx10.1-512 does it compile successfully.
|
|
||||||
// __EVEX512__ is set only when compiled with -mavx10.1-512.
|
|
||||||
// Adding -fno-evex512 doesn't help.
|
|
||||||
// Building with -mapxf fails on a CPU without APX because configure can't
|
|
||||||
// run its test program.
|
|
||||||
/*
|
|
||||||
#ifdef __AVX10_1__
|
|
||||||
#warning "__AVX10_1__"
|
|
||||||
#endif
|
|
||||||
#ifdef __AVX10_1_256__
|
|
||||||
#warning "__AVX10_1_256__"
|
|
||||||
#endif
|
|
||||||
#ifdef __AVX10_1_512__
|
|
||||||
#warning "__AVX10_1_512__"
|
|
||||||
#endif
|
|
||||||
#ifdef __EVEX512__
|
|
||||||
#warning "__EVEX512__"
|
|
||||||
#endif
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
// Typical display format: AVX10.[version]_[vectorlength], if vector length is
|
// Typical display format: AVX10.[version]_[vectorlength], if vector length is
|
||||||
// omitted 256 is the default.
|
// omitted 256 is the default.
|
||||||
// Ex: AVX10.1_512
|
// Ex: AVX10.1_512
|
||||||
@@ -646,7 +623,7 @@ static inline bool has_avx2()
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Also ensure kernel supports feature
|
// SVE vector width is determined at run time.
|
||||||
static inline bool has_sve()
|
static inline bool has_sve()
|
||||||
{
|
{
|
||||||
#if defined(__aarch64__) && defined(HWCAP_SVE)
|
#if defined(__aarch64__) && defined(HWCAP_SVE)
|
||||||
@@ -780,6 +757,7 @@ static inline bool has_aes()
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
#elif defined(__aarch64__) && defined(HWCAP_AES)
|
#elif defined(__aarch64__) && defined(HWCAP_AES)
|
||||||
|
// NEON AES
|
||||||
unsigned int cpu_info[4] = { 0 };
|
unsigned int cpu_info[4] = { 0 };
|
||||||
cpuid( 0, 0, cpu_info );
|
cpuid( 0, 0, cpu_info );
|
||||||
return cpu_info[0] & HWCAP_AES;
|
return cpu_info[0] & HWCAP_AES;
|
||||||
@@ -825,6 +803,7 @@ static inline bool has_sha256()
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
#elif defined(__aarch64__) && defined(HWCAP_SHA2)
|
#elif defined(__aarch64__) && defined(HWCAP_SHA2)
|
||||||
|
// NEON SHA256
|
||||||
unsigned int cpu_info[4] = { 0 };
|
unsigned int cpu_info[4] = { 0 };
|
||||||
cpuid( 0, 0, cpu_info );
|
cpuid( 0, 0, cpu_info );
|
||||||
return cpu_info[0] & HWCAP_SHA2;
|
return cpu_info[0] & HWCAP_SHA2;
|
||||||
@@ -844,6 +823,7 @@ static inline bool has_sha512()
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
#elif defined(__aarch64__) && defined(HWCAP_SHA512)
|
#elif defined(__aarch64__) && defined(HWCAP_SHA512)
|
||||||
|
// NEON SHA512
|
||||||
unsigned int cpu_info[4] = { 0 };
|
unsigned int cpu_info[4] = { 0 };
|
||||||
cpuid( 0, 0, cpu_info );
|
cpuid( 0, 0, cpu_info );
|
||||||
return cpu_info[0] & HWCAP_SHA512;
|
return cpu_info[0] & HWCAP_SHA512;
|
||||||
@@ -856,6 +836,7 @@ static inline bool has_sha512()
|
|||||||
static inline bool has_sha3()
|
static inline bool has_sha3()
|
||||||
{
|
{
|
||||||
#if defined(__aarch64__) && defined(HWCAP_SHA3)
|
#if defined(__aarch64__) && defined(HWCAP_SHA3)
|
||||||
|
// NEON SHA3
|
||||||
unsigned int cpu_info[4] = { 0 };
|
unsigned int cpu_info[4] = { 0 };
|
||||||
cpuid( 0, 0, cpu_info );
|
cpuid( 0, 0, cpu_info );
|
||||||
return cpu_info[0] & HWCAP_SHA3;
|
return cpu_info[0] & HWCAP_SHA3;
|
||||||
@@ -948,7 +929,7 @@ static inline bool has_avx10_512()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Includes 128 but may not include 512
|
// Includes 128 but might not include 512
|
||||||
static inline bool has_avx10_256()
|
static inline bool has_avx10_256()
|
||||||
{
|
{
|
||||||
#if defined(__x86_64__)
|
#if defined(__x86_64__)
|
||||||
|
Reference in New Issue
Block a user