mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.16.4
This commit is contained in:
@@ -65,6 +65,11 @@ If not what makes it happen or not happen?
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.16.4
|
||||
|
||||
Faster sha512 and sha256 when not using SHA CPU extension.
|
||||
#329: Fixed GBT incorrect target diff in stats.
|
||||
|
||||
v3.16.3
|
||||
|
||||
#313 Fix compile error with GCC 11.
|
||||
|
@@ -74,9 +74,15 @@ static const uint32_t K256[64] =
|
||||
#define CHs(X, Y, Z) \
|
||||
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( Y, Z ), X ), Z )
|
||||
|
||||
/*
|
||||
#define MAJs(X, Y, Z) \
|
||||
_mm_or_si128( _mm_and_si128( X, Y ), \
|
||||
_mm_and_si128( _mm_or_si128( X, Y ), Z ) )
|
||||
*/
|
||||
|
||||
#define MAJs(X, Y, Z) \
|
||||
_mm_xor_si128( Y, _mm_and_si128( _mm_xor_si128( X, Y ), \
|
||||
_mm_xor_si128( Y, Z ) ) )
|
||||
|
||||
#define BSG2_0(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
@@ -345,9 +351,20 @@ void sha256_4way_full( void *dst, const void *data, size_t len )
|
||||
#define CHx(X, Y, Z) \
|
||||
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )
|
||||
|
||||
/*
|
||||
#define MAJx(X, Y, Z) \
|
||||
_mm256_or_si256( _mm256_and_si256( X, Y ), \
|
||||
_mm256_and_si256( _mm256_or_si256( X, Y ), Z ) )
|
||||
*/
|
||||
/*
|
||||
#define MAJx(X, Y, Z) \
|
||||
_mm256_xor_si256( Y, _mm256_and_si256( _mm256_xor_si256( X, Y ), \
|
||||
_mm256_xor_si256( Y, Z ) ) )
|
||||
*/
|
||||
|
||||
#define MAJx(X, Y, Z) \
|
||||
_mm256_xor_si256( Y, _mm256_and_si256( X_xor_Y = _mm256_xor_si256( X, Y ), \
|
||||
Y_xor_Z ) )
|
||||
|
||||
#define BSG2_0x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
@@ -375,6 +392,7 @@ do { \
|
||||
T1 = _mm256_add_epi32( H, mm256_add4_32( BSG2_1x(E), CHx(E, F, G), \
|
||||
K, W[i] ) ); \
|
||||
T2 = _mm256_add_epi32( BSG2_0x(A), MAJx(A, B, C) ); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = _mm256_add_epi32( D, T1 ); \
|
||||
H = _mm256_add_epi32( T1, T2 ); \
|
||||
} while (0)
|
||||
@@ -382,7 +400,7 @@ do { \
|
||||
static void
|
||||
sha256_8way_round( sha256_8way_context *ctx, __m256i *in, __m256i r[8] )
|
||||
{
|
||||
register __m256i A, B, C, D, E, F, G, H;
|
||||
register __m256i A, B, C, D, E, F, G, H, X_xor_Y, Y_xor_Z;
|
||||
__m256i W[16];
|
||||
|
||||
mm256_block_bswap_32( W , in );
|
||||
@@ -411,6 +429,8 @@ sha256_8way_round( sha256_8way_context *ctx, __m256i *in, __m256i r[8] )
|
||||
H = m256_const1_64( 0x5BE0CD195BE0CD19 );
|
||||
}
|
||||
|
||||
Y_xor_Z = _mm256_xor_si256( B, C );
|
||||
|
||||
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
|
||||
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
|
||||
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 2, 0 );
|
||||
@@ -591,9 +611,20 @@ void sha256_8way_full( void *dst, const void *data, size_t len )
|
||||
#define CHx16(X, Y, Z) \
|
||||
_mm512_xor_si512( _mm512_and_si512( _mm512_xor_si512( Y, Z ), X ), Z )
|
||||
|
||||
/*
|
||||
#define MAJx16(X, Y, Z) \
|
||||
_mm512_or_si512( _mm512_and_si512( X, Y ), \
|
||||
_mm512_and_si512( _mm512_or_si512( X, Y ), Z ) )
|
||||
*/
|
||||
/*
|
||||
#define MAJx16(X, Y, Z) \
|
||||
_mm512_xor_si512( Y, _mm512_and_si512( _mm512_xor_si512( X, Y ), \
|
||||
_mm512_xor_si512( Y, Z ) ) )
|
||||
*/
|
||||
|
||||
#define MAJx16(X, Y, Z) \
|
||||
_mm512_xor_si512( Y, _mm512_and_si512( X_xor_Y = _mm512_xor_si512( X, Y ), \
|
||||
Y_xor_Z ) )
|
||||
|
||||
#define BSG2_0x16(x) \
|
||||
_mm512_xor_si512( _mm512_xor_si512( \
|
||||
@@ -621,6 +652,7 @@ do { \
|
||||
T1 = _mm512_add_epi32( H, mm512_add4_32( BSG2_1x16(E), CHx16(E, F, G), \
|
||||
K, W[i] ) ); \
|
||||
T2 = _mm512_add_epi32( BSG2_0x16(A), MAJx16(A, B, C) ); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = _mm512_add_epi32( D, T1 ); \
|
||||
H = _mm512_add_epi32( T1, T2 ); \
|
||||
} while (0)
|
||||
@@ -628,7 +660,7 @@ do { \
|
||||
static void
|
||||
sha256_16way_round( sha256_16way_context *ctx, __m512i *in, __m512i r[8] )
|
||||
{
|
||||
register __m512i A, B, C, D, E, F, G, H;
|
||||
register __m512i A, B, C, D, E, F, G, H, X_xor_Y, Y_xor_Z;
|
||||
__m512i W[16];
|
||||
|
||||
mm512_block_bswap_32( W , in );
|
||||
@@ -657,6 +689,8 @@ sha256_16way_round( sha256_16way_context *ctx, __m512i *in, __m512i r[8] )
|
||||
H = m512_const1_64( 0x5BE0CD195BE0CD19 );
|
||||
}
|
||||
|
||||
Y_xor_Z = _mm512_xor_si512( B, C );
|
||||
|
||||
SHA2s_16WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
|
||||
SHA2s_16WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
|
||||
SHA2s_16WAY_STEP( G, H, A, B, C, D, E, F, 2, 0 );
|
||||
|
@@ -98,9 +98,21 @@ static const uint64_t K512[80] =
|
||||
#define CH8W(X, Y, Z) \
|
||||
_mm512_xor_si512( _mm512_and_si512( _mm512_xor_si512( Y, Z ), X ), Z )
|
||||
|
||||
/*
|
||||
#define MAJ8W(X, Y, Z) \
|
||||
_mm512_or_si512( _mm512_and_si512( X, Y ), \
|
||||
_mm512_and_si512( _mm512_or_si512( X, Y ), Z ) )
|
||||
*/
|
||||
/* Functionally identical to original but optimizable,
|
||||
* subexpression X^Y from one step can be reused in the next step as Y^Z
|
||||
#define MAJ8W(X, Y, Z) \
|
||||
_mm512_xor_si512( Y, _mm512_and_si512( _mm512_xor_si512( X, Y ), \
|
||||
_mm512_xor_si512( Y, Z ) ) )
|
||||
*/
|
||||
|
||||
#define MAJ8W(X, Y, Z) \
|
||||
_mm512_xor_si512( Y, _mm512_and_si512( X_xor_Y = _mm512_xor_si512( X, Y ), \
|
||||
Y_xor_Z ) )
|
||||
|
||||
#define BSG8W_5_0(x) \
|
||||
_mm512_xor_si512( _mm512_xor_si512( \
|
||||
@@ -172,6 +184,7 @@ do { \
|
||||
T1 = _mm512_add_epi64( H, mm512_add4_64( BSG8W_5_1(E), CH8W(E, F, G), \
|
||||
K, W[i] ) ); \
|
||||
T2 = _mm512_add_epi64( BSG8W_5_0(A), MAJ8W(A, B, C) ); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = _mm512_add_epi64( D, T1 ); \
|
||||
H = _mm512_add_epi64( T1, T2 ); \
|
||||
} while (0)
|
||||
@@ -180,7 +193,7 @@ static void
|
||||
sha512_8way_round( sha512_8way_context *ctx, __m512i *in, __m512i r[8] )
|
||||
{
|
||||
int i;
|
||||
register __m512i A, B, C, D, E, F, G, H;
|
||||
register __m512i A, B, C, D, E, F, G, H, X_xor_Y, Y_xor_Z;
|
||||
__m512i W[80];
|
||||
|
||||
mm512_block_bswap_64( W , in );
|
||||
@@ -213,6 +226,8 @@ sha512_8way_round( sha512_8way_context *ctx, __m512i *in, __m512i r[8] )
|
||||
H = m512_const1_64( 0x5BE0CD19137E2179 );
|
||||
}
|
||||
|
||||
Y_xor_Z = _mm512_xor_si512( B, C );
|
||||
|
||||
for ( i = 0; i < 80; i += 8 )
|
||||
{
|
||||
SHA3_8WAY_STEP( A, B, C, D, E, F, G, H, i + 0 );
|
||||
@@ -319,14 +334,20 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst )
|
||||
|
||||
// SHA-512 4 way 64 bit
|
||||
|
||||
/*
|
||||
|
||||
#define CH(X, Y, Z) \
|
||||
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )
|
||||
|
||||
/*
|
||||
#define MAJ(X, Y, Z) \
|
||||
_mm256_or_si256( _mm256_and_si256( X, Y ), \
|
||||
_mm256_and_si256( _mm256_or_si256( X, Y ), Z ) )
|
||||
*/
|
||||
|
||||
#define MAJ(X, Y, Z) \
|
||||
_mm256_xor_si256( Y, _mm256_and_si256( X_xor_Y = _mm256_xor_si256( X, Y ), \
|
||||
Y_xor_Z ) )
|
||||
|
||||
#define BSG5_0(x) \
|
||||
mm256_ror_64( _mm256_xor_si256( mm256_ror_64( \
|
||||
_mm256_xor_si256( mm256_ror_64( x, 5 ), x ), 6 ), x ), 28 )
|
||||
@@ -334,7 +355,7 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst )
|
||||
#define BSG5_1(x) \
|
||||
mm256_ror_64( _mm256_xor_si256( mm256_ror_64( \
|
||||
_mm256_xor_si256( mm256_ror_64( x, 23 ), x ), 4 ), x ), 14 )
|
||||
*/
|
||||
|
||||
/*
|
||||
#define BSG5_0(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
@@ -402,7 +423,7 @@ static inline __m256i ssg512_add( __m256i w0, __m256i w1 )
|
||||
w1 = _mm256_xor_si256( X1a, X1b ); \
|
||||
} while(0)
|
||||
*/
|
||||
|
||||
/*
|
||||
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
|
||||
do { \
|
||||
__m256i K = _mm256_set1_epi64x( K512[ i ] ); \
|
||||
@@ -431,7 +452,7 @@ do { \
|
||||
H = _mm256_add_epi64( T1, T2 ); \
|
||||
D = _mm256_add_epi64( D, T1 ); \
|
||||
} while (0)
|
||||
|
||||
*/
|
||||
/*
|
||||
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
|
||||
do { \
|
||||
@@ -445,7 +466,7 @@ do { \
|
||||
} while (0)
|
||||
*/
|
||||
|
||||
/*
|
||||
|
||||
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
|
||||
do { \
|
||||
__m256i T1, T2; \
|
||||
@@ -453,16 +474,17 @@ do { \
|
||||
T1 = _mm256_add_epi64( H, mm256_add4_64( BSG5_1(E), CH(E, F, G), \
|
||||
K, W[i] ) ); \
|
||||
T2 = _mm256_add_epi64( BSG5_0(A), MAJ(A, B, C) ); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = _mm256_add_epi64( D, T1 ); \
|
||||
H = _mm256_add_epi64( T1, T2 ); \
|
||||
} while (0)
|
||||
*/
|
||||
|
||||
|
||||
static void
|
||||
sha512_4way_round( sha512_4way_context *ctx, __m256i *in, __m256i r[8] )
|
||||
{
|
||||
int i;
|
||||
register __m256i A, B, C, D, E, F, G, H;
|
||||
register __m256i A, B, C, D, E, F, G, H, X_xor_Y, Y_xor_Z;
|
||||
__m256i W[80];
|
||||
|
||||
mm256_block_bswap_64( W , in );
|
||||
@@ -495,6 +517,8 @@ sha512_4way_round( sha512_4way_context *ctx, __m256i *in, __m256i r[8] )
|
||||
H = m256_const1_64( 0x5BE0CD19137E2179 );
|
||||
}
|
||||
|
||||
Y_xor_Z = _mm256_xor_si256( B, C );
|
||||
|
||||
for ( i = 0; i < 80; i += 8 )
|
||||
{
|
||||
SHA3_4WAY_STEP( A, B, C, D, E, F, G, H, i + 0 );
|
||||
|
@@ -40,8 +40,8 @@
|
||||
#endif
|
||||
|
||||
#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
|
||||
#define MAJ(X, Y, Z) (((Y) & (Z)) | (((Y) | (Z)) & (X)))
|
||||
|
||||
//#define MAJ(X, Y, Z) (((Y) & (Z)) | (((Y) | (Z)) & (X)))
|
||||
#define MAJ( X, Y, Z ) ( Y ^ ( ( X ^ Y ) & ( Y ^ Z ) ) )
|
||||
#define ROTR SPH_ROTR32
|
||||
|
||||
#define BSG2_0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
|
@@ -38,7 +38,8 @@
|
||||
#if SPH_64
|
||||
|
||||
#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
|
||||
#define MAJ(X, Y, Z) (((X) & (Y)) | (((X) | (Y)) & (Z)))
|
||||
//#define MAJ(X, Y, Z) (((X) & (Y)) | (((X) | (Y)) & (Z)))
|
||||
#define MAJ( X, Y, Z ) ( Y ^ ( ( X ^ Y ) & ( Y ^ Z ) ) )
|
||||
|
||||
#define ROTR64 SPH_ROTR64
|
||||
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.16.3.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.16.4.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.16.3'
|
||||
PACKAGE_STRING='cpuminer-opt 3.16.3'
|
||||
PACKAGE_VERSION='3.16.4'
|
||||
PACKAGE_STRING='cpuminer-opt 3.16.4'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.16.3 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.16.4 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1404,7 +1404,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.16.3:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.16.4:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1509,7 +1509,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.16.3
|
||||
cpuminer-opt configure 3.16.4
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.16.3, which was
|
||||
It was created by cpuminer-opt $as_me 3.16.4, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2993,7 +2993,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.16.3'
|
||||
VERSION='3.16.4'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.16.3, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.16.4, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.16.3
|
||||
cpuminer-opt config.status 3.16.4
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.16.3])
|
||||
AC_INIT([cpuminer-opt], [3.16.4])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
@@ -447,8 +447,10 @@ static bool work_decode( const json_t *val, struct work *work )
|
||||
|
||||
if ( !allow_mininginfo )
|
||||
net_diff = algo_gate.calc_network_diff( work );
|
||||
else
|
||||
net_diff = hash_to_diff( work->target );
|
||||
|
||||
work->targetdiff = hash_to_diff( work->target );
|
||||
work->targetdiff = net_diff;
|
||||
stratum_diff = last_targetdiff = work->targetdiff;
|
||||
work->sharediff = 0;
|
||||
algo_gate.decode_extra_data( work, &net_blocks );
|
||||
@@ -908,7 +910,8 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
}
|
||||
for ( i = 0; i < ARRAY_SIZE( work->target ); i++ )
|
||||
work->target[7 - i] = be32dec( target + i );
|
||||
|
||||
net_diff = work->targetdiff = hash_to_diff( work->target );
|
||||
|
||||
tmp = json_object_get( val, "workid" );
|
||||
if ( tmp )
|
||||
{
|
||||
|
Reference in New Issue
Block a user