mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
1a234cbe53 |
@@ -171,6 +171,7 @@ cpuminer_SOURCES = \
|
|||||||
algo/sha/hmac-sha256-hash-4way.c \
|
algo/sha/hmac-sha256-hash-4way.c \
|
||||||
algo/sha/sha256d.c \
|
algo/sha/sha256d.c \
|
||||||
algo/sha/sha2.c \
|
algo/sha/sha2.c \
|
||||||
|
algo/sha/sha256d-4way.c \
|
||||||
algo/sha/sha256t-gate.c \
|
algo/sha/sha256t-gate.c \
|
||||||
algo/sha/sha256t-4way.c \
|
algo/sha/sha256t-4way.c \
|
||||||
algo/sha/sha256t.c \
|
algo/sha/sha256t.c \
|
||||||
|
@@ -65,6 +65,20 @@ If not what makes it happen or not happen?
|
|||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
v3.8.2
|
||||||
|
|
||||||
|
Issue #342, fixed Groestl AES on Windows, broken in v3.18.0.
|
||||||
|
|
||||||
|
AVX512 for sha256d.
|
||||||
|
|
||||||
|
SSE42 and AVX may now be displayed as mining features at startup.
|
||||||
|
This is hard coded for each algo, and is only implemented for scrypt
|
||||||
|
at this time as it is the only algo with significant performance differences
|
||||||
|
with those features.
|
||||||
|
|
||||||
|
Fixed an issue where a high hashrate algo could cause excessive invalid hash
|
||||||
|
rate log reports when starting up in benchmark mode.
|
||||||
|
|
||||||
v3.18.1
|
v3.18.1
|
||||||
|
|
||||||
More speed for scrypt:
|
More speed for scrypt:
|
||||||
|
@@ -337,42 +337,42 @@ do{ \
|
|||||||
XC2 = XOR( XC2, TC ); \
|
XC2 = XOR( XC2, TC ); \
|
||||||
\
|
\
|
||||||
TA = ADD32( XA2, XA1 ); \
|
TA = ADD32( XA2, XA1 ); \
|
||||||
|
XA1 = ROL_1X32( XA1 ); \
|
||||||
TB = ADD32( XB2, XB1 ); \
|
TB = ADD32( XB2, XB1 ); \
|
||||||
TC = ADD32( XC2, XC1 ); \
|
TC = ADD32( XC2, XC1 ); \
|
||||||
TA = ROL32( TA, 13 ); \
|
|
||||||
XA1 = ROL_1X32( XA1 ); \
|
|
||||||
XB1 = ROL_1X32( XB1 ); \
|
XB1 = ROL_1X32( XB1 ); \
|
||||||
XC1 = ROL_1X32( XC1 ); \
|
TA = ROL32( TA, 13 ); \
|
||||||
XA3 = XOR( XA3, TA ); \
|
XA3 = XOR( XA3, TA ); \
|
||||||
|
XC1 = ROL_1X32( XC1 ); \
|
||||||
TB = ROL32( TB, 13 ); \
|
TB = ROL32( TB, 13 ); \
|
||||||
XB3 = XOR( XB3, TB ); \
|
XB3 = XOR( XB3, TB ); \
|
||||||
TC = ROL32( TC, 13 ); \
|
TC = ROL32( TC, 13 ); \
|
||||||
XC3 = XOR( XC3, TC ); \
|
XC3 = XOR( XC3, TC ); \
|
||||||
\
|
\
|
||||||
TA = ADD32( XA3, XA2 ); \
|
TA = ADD32( XA3, XA2 ); \
|
||||||
|
XA2 = SWAP_64( XA2 ); \
|
||||||
TB = ADD32( XB3, XB2 ); \
|
TB = ADD32( XB3, XB2 ); \
|
||||||
TC = ADD32( XC3, XC2 ); \
|
TC = ADD32( XC3, XC2 ); \
|
||||||
TA = ROL32( TA, 18 ); \
|
TA = ROL32( TA, 18 ); \
|
||||||
XA2 = SWAP_64( XA2 ); \
|
|
||||||
XB2 = SWAP_64( XB2 ); \
|
XB2 = SWAP_64( XB2 ); \
|
||||||
XC2 = SWAP_64( XC2 ); \
|
|
||||||
XA0 = XOR( XA0, TA ); \
|
XA0 = XOR( XA0, TA ); \
|
||||||
TB = ROL32( TB, 18 ); \
|
TB = ROL32( TB, 18 ); \
|
||||||
XB0 = XOR( XB0, TB ); \
|
XB0 = XOR( XB0, TB ); \
|
||||||
|
XC2 = SWAP_64( XC2 ); \
|
||||||
TC = ROL32( TC, 18 ); \
|
TC = ROL32( TC, 18 ); \
|
||||||
XC0 = XOR( XC0, TC ); \
|
XC0 = XOR( XC0, TC ); \
|
||||||
\
|
\
|
||||||
TA = ADD32( XA0, XA1 ); \
|
TA = ADD32( XA0, XA1 ); \
|
||||||
|
XA3 = ROR_1X32( XA3 ); \
|
||||||
TB = ADD32( XB0, XB1 ); \
|
TB = ADD32( XB0, XB1 ); \
|
||||||
TC = ADD32( XC0, XC1 ); \
|
TC = ADD32( XC0, XC1 ); \
|
||||||
TA = ROL32( TA, 7 ); \
|
TA = ROL32( TA, 7 ); \
|
||||||
XA3 = ROR_1X32( XA3 ); \
|
XB3 = ROR_1X32( XB3 ); \
|
||||||
XA3 = XOR( XA3, TA ); \
|
XA3 = XOR( XA3, TA ); \
|
||||||
TB = ROL32( TB, 7 ); \
|
TB = ROL32( TB, 7 ); \
|
||||||
XB3 = ROR_1X32( XB3 ); \
|
XC3 = ROR_1X32( XC3 ); \
|
||||||
XB3 = XOR( XB3, TB ); \
|
XB3 = XOR( XB3, TB ); \
|
||||||
TC = ROL32( TC, 7 ); \
|
TC = ROL32( TC, 7 ); \
|
||||||
XC3 = ROR_1X32( XC3 ); \
|
|
||||||
XC3 = XOR( XC3, TC ); \
|
XC3 = XOR( XC3, TC ); \
|
||||||
\
|
\
|
||||||
TA = ADD32( XA3, XA0 ); \
|
TA = ADD32( XA3, XA0 ); \
|
||||||
@@ -399,24 +399,24 @@ do{ \
|
|||||||
XC1 = XOR( XC1, TC ); \
|
XC1 = XOR( XC1, TC ); \
|
||||||
\
|
\
|
||||||
TA = ADD32( XA1, XA2 ); \
|
TA = ADD32( XA1, XA2 ); \
|
||||||
|
XA2 = SWAP_64( XA2 ); \
|
||||||
TB = ADD32( XB1, XB2 ); \
|
TB = ADD32( XB1, XB2 ); \
|
||||||
|
XB2 = SWAP_64( XB2 ); \
|
||||||
TA = ROL32( TA, 18); \
|
TA = ROL32( TA, 18); \
|
||||||
TC = ADD32( XC1, XC2 ); \
|
TC = ADD32( XC1, XC2 ); \
|
||||||
XA2 = SWAP_64( XA2 ); \
|
XC2 = SWAP_64( XC2 ); \
|
||||||
TB = ROL32( TB, 18); \
|
TB = ROL32( TB, 18); \
|
||||||
XA0 = XOR( XA0, TA ); \
|
XA0 = XOR( XA0, TA ); \
|
||||||
XB2 = SWAP_64( XB2 ); \
|
XA1 = ROR_1X32( XA1 ); \
|
||||||
TC = ROL32( TC, 18); \
|
TC = ROL32( TC, 18); \
|
||||||
XB0 = XOR( XB0, TB ); \
|
XB0 = XOR( XB0, TB ); \
|
||||||
XC2 = SWAP_64( XC2 ); \
|
|
||||||
XA1 = ROR_1X32( XA1 ); \
|
|
||||||
XB1 = ROR_1X32( XB1 ); \
|
XB1 = ROR_1X32( XB1 ); \
|
||||||
XC0 = XOR( XC0, TC ); \
|
XC0 = XOR( XC0, TC ); \
|
||||||
XC1 = ROR_1X32( XC1 ); \
|
XC1 = ROR_1X32( XC1 ); \
|
||||||
} while (0);
|
} while (0);
|
||||||
|
|
||||||
|
|
||||||
// slow rol, an attempt to optimze non-avx512 bit rotations
|
// slow rot, an attempt to optimze non-avx512 bit rotations
|
||||||
// Contains target specific instructions, only for use with 128 bit vectors
|
// Contains target specific instructions, only for use with 128 bit vectors
|
||||||
#define SALSA_2ROUNDS_SIMD128_3BUF_SLOROT \
|
#define SALSA_2ROUNDS_SIMD128_3BUF_SLOROT \
|
||||||
do{ \
|
do{ \
|
||||||
|
@@ -28,7 +28,6 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "algo-gate-api.h"
|
#include "algo-gate-api.h"
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
@@ -55,11 +54,25 @@ static const uint32_t sha256_initial_state[8] =
|
|||||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||||
};
|
};
|
||||||
|
|
||||||
static int scrypt_throughput = 0;
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
|
||||||
|
#define SCRYPT_THROUGHPUT 16
|
||||||
|
|
||||||
|
#elif defined(__AVX2__)
|
||||||
|
|
||||||
|
#define SCRYPT_THROUGHPUT 8
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define SCRYPT_THROUGHPUT 4
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// static int scrypt_throughput = 0;
|
||||||
|
|
||||||
static int scratchbuf_size = 0;
|
static int scratchbuf_size = 0;
|
||||||
|
|
||||||
static __thread char *scratchbuf = NULL;
|
static __thread uint32_t *scratchbuf = NULL;
|
||||||
|
|
||||||
// change this to a constant to be used directly as input state arg
|
// change this to a constant to be used directly as input state arg
|
||||||
// vectors still need an init function.
|
// vectors still need an init function.
|
||||||
@@ -709,15 +722,11 @@ static inline void PBKDF2_SHA256_128_32_16way( uint32_t *tstate,
|
|||||||
|
|
||||||
#endif // AVX512
|
#endif // AVX512
|
||||||
|
|
||||||
//#if defined(USE_ASM) && defined(__x86_64__)
|
|
||||||
|
|
||||||
#define SCRYPT_MAX_WAYS 12
|
#define SCRYPT_MAX_WAYS 12
|
||||||
#define HAVE_SCRYPT_3WAY 1
|
#define HAVE_SCRYPT_3WAY 1
|
||||||
//int scrypt_best_throughput();
|
|
||||||
void scrypt_core(uint32_t *X, uint32_t *V, int N);
|
void scrypt_core(uint32_t *X, uint32_t *V, int N);
|
||||||
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
|
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
|
||||||
|
|
||||||
//#if defined(USE_AVX2)
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
#undef SCRYPT_MAX_WAYS
|
#undef SCRYPT_MAX_WAYS
|
||||||
#define SCRYPT_MAX_WAYS 24
|
#define SCRYPT_MAX_WAYS 24
|
||||||
@@ -727,40 +736,39 @@ void scrypt_core_6way(uint32_t *X, uint32_t *V, int N);
|
|||||||
|
|
||||||
#ifndef SCRYPT_MAX_WAYS
|
#ifndef SCRYPT_MAX_WAYS
|
||||||
#define SCRYPT_MAX_WAYS 1
|
#define SCRYPT_MAX_WAYS 1
|
||||||
//#define scrypt_best_throughput() 1
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "scrypt-core-4way.h"
|
#include "scrypt-core-4way.h"
|
||||||
|
|
||||||
static bool scrypt_N_1_1_256(const uint32_t *input, uint32_t *output,
|
/*
|
||||||
uint32_t *midstate, unsigned char *scratchpad, int N, int thr_id )
|
static bool scrypt_N_1_1_256( const uint32_t *input, uint32_t *output,
|
||||||
|
uint32_t *midstate, int N, int thr_id )
|
||||||
{
|
{
|
||||||
uint32_t tstate[8], ostate[8];
|
uint32_t tstate[8], ostate[8];
|
||||||
uint32_t X[32];
|
uint32_t X[32];
|
||||||
uint32_t *V = (uint32_t*)scratchpad;
|
|
||||||
|
|
||||||
memcpy(tstate, midstate, 32);
|
memcpy(tstate, midstate, 32);
|
||||||
HMAC_SHA256_80_init(input, tstate, ostate);
|
HMAC_SHA256_80_init(input, tstate, ostate);
|
||||||
PBKDF2_SHA256_80_128(tstate, ostate, input, X);
|
PBKDF2_SHA256_80_128(tstate, ostate, input, X);
|
||||||
|
|
||||||
scrypt_core_simd128( X, V, N ); // woring
|
scrypt_core_simd128( X, scratchbuf, N ); // woring
|
||||||
// scrypt_core_1way( X, V, N ); // working
|
// scrypt_core_1way( X, V, N ); // working
|
||||||
// scrypt_core(X, V, N);
|
// scrypt_core(X, V, N);
|
||||||
|
|
||||||
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
|
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if ( SCRYPT_THROUGHPUT == 8 )
|
||||||
|
|
||||||
static int scrypt_N_1_1_256_8way( const uint32_t *input, uint32_t *output,
|
static int scrypt_N_1_1_256_8way( const uint32_t *input, uint32_t *output,
|
||||||
uint32_t *midstate, unsigned char *scratchpad, int N, int thrid )
|
uint32_t *midstate, int N, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) tstate[ 8*8 ];
|
uint32_t _ALIGN(128) tstate[ 8*8 ];
|
||||||
uint32_t _ALIGN(128) ostate[ 8*8 ];
|
uint32_t _ALIGN(128) ostate[ 8*8 ];
|
||||||
uint32_t _ALIGN(128) W[ 8*32 ];
|
uint32_t _ALIGN(128) W[ 8*32 ];
|
||||||
uint32_t _ALIGN(128) X[ 8*32 ];
|
uint32_t _ALIGN(128) X[ 8*32 ];
|
||||||
uint32_t *V = (uint32_t*)scratchpad;
|
|
||||||
|
|
||||||
intrlv_8x32( W, input, input+ 20, input+ 40, input+ 60,
|
intrlv_8x32( W, input, input+ 20, input+ 40, input+ 60,
|
||||||
input+80, input+100, input+120, input+140, 640 );
|
input+80, input+100, input+120, input+140, 640 );
|
||||||
@@ -774,11 +782,11 @@ static int scrypt_N_1_1_256_8way( const uint32_t *input, uint32_t *output,
|
|||||||
|
|
||||||
if ( opt_param_n > 0x4000 )
|
if ( opt_param_n > 0x4000 )
|
||||||
{
|
{
|
||||||
scrypt_core_simd128_3buf( X, V, N );
|
scrypt_core_simd128_3buf( X, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_3buf( X+ 96, V, N );
|
scrypt_core_simd128_3buf( X+ 96, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_2buf( X+192, V, N );
|
scrypt_core_simd128_2buf( X+192, scratchbuf, N );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -786,13 +794,13 @@ static int scrypt_N_1_1_256_8way( const uint32_t *input, uint32_t *output,
|
|||||||
intrlv_2x128( W+ 64, X+ 64, X+ 96, 1024 );
|
intrlv_2x128( W+ 64, X+ 64, X+ 96, 1024 );
|
||||||
intrlv_2x128( W+128, X+128, X+160, 1024 );
|
intrlv_2x128( W+128, X+128, X+160, 1024 );
|
||||||
intrlv_2x128( W+192, X+192, X+224, 1024 );
|
intrlv_2x128( W+192, X+192, X+224, 1024 );
|
||||||
scrypt_core_2way_simd128( (__m256i*) W, (__m256i*)V, N );
|
scrypt_core_2way_simd128( (__m256i*) W, (__m256i*)scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_2way_simd128( (__m256i*)(W+ 64), (__m256i*)V, N );
|
scrypt_core_2way_simd128( (__m256i*)(W+ 64), (__m256i*)scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_2way_simd128( (__m256i*)(W+128), (__m256i*)V, N );
|
scrypt_core_2way_simd128( (__m256i*)(W+128), (__m256i*)scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_2way_simd128( (__m256i*)(W+192), (__m256i*)V, N );
|
scrypt_core_2way_simd128( (__m256i*)(W+192), (__m256i*)scratchbuf, N );
|
||||||
dintrlv_2x128( X, X+ 32, W, 1024 );
|
dintrlv_2x128( X, X+ 32, W, 1024 );
|
||||||
dintrlv_2x128( X+ 64, X+ 96, W+ 64, 1024 );
|
dintrlv_2x128( X+ 64, X+ 96, W+ 64, 1024 );
|
||||||
dintrlv_2x128( X+128, X+160, W+128, 1024 );
|
dintrlv_2x128( X+128, X+160, W+128, 1024 );
|
||||||
@@ -928,16 +936,15 @@ static int scrypt_N_1_1_256_8way( const uint32_t *input, uint32_t *output,
|
|||||||
|
|
||||||
#endif // AVX2
|
#endif // AVX2
|
||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
#if ( SCRYPT_THROUGHPUT == 16 )
|
||||||
|
|
||||||
static int scrypt_N_1_1_256_16way( const uint32_t *input, uint32_t *output,
|
static int scrypt_N_1_1_256_16way( const uint32_t *input, uint32_t *output,
|
||||||
uint32_t *midstate, unsigned char *scratchpad, int N, int thrid )
|
uint32_t *midstate, int N, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) tstate[ 16*8 ];
|
uint32_t _ALIGN(128) tstate[ 16*8 ];
|
||||||
uint32_t _ALIGN(128) ostate[ 16*8 ];
|
uint32_t _ALIGN(128) ostate[ 16*8 ];
|
||||||
uint32_t _ALIGN(128) W[ 16*32 ];
|
uint32_t _ALIGN(128) W[ 16*32 ];
|
||||||
uint32_t _ALIGN(128) X[ 16*32 ];
|
uint32_t _ALIGN(128) X[ 16*32 ];
|
||||||
uint32_t *V = (uint32_t*)scratchpad;
|
|
||||||
|
|
||||||
intrlv_16x32( W, input, input+ 20, input+ 40, input+ 60,
|
intrlv_16x32( W, input, input+ 20, input+ 40, input+ 60,
|
||||||
input+ 80, input+100, input+120, input+140,
|
input+ 80, input+100, input+120, input+140,
|
||||||
@@ -956,17 +963,17 @@ static int scrypt_N_1_1_256_16way( const uint32_t *input, uint32_t *output,
|
|||||||
|
|
||||||
if ( opt_param_n > 0x4000 )
|
if ( opt_param_n > 0x4000 )
|
||||||
{
|
{
|
||||||
scrypt_core_simd128_3buf( X, V, N );
|
scrypt_core_simd128_3buf( X, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_3buf( X+ 96, V, N );
|
scrypt_core_simd128_3buf( X+ 96, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_2buf( X+192, V, N );
|
scrypt_core_simd128_2buf( X+192, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_3buf( X+256, V, N );
|
scrypt_core_simd128_3buf( X+256, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_3buf( X+352, V, N );
|
scrypt_core_simd128_3buf( X+352, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_2buf( X+448, V, N );
|
scrypt_core_simd128_2buf( X+448, scratchbuf, N );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -974,13 +981,13 @@ static int scrypt_N_1_1_256_16way( const uint32_t *input, uint32_t *output,
|
|||||||
intrlv_4x128( W+128, X+128, X+160, X+192, X+224, 1024 );
|
intrlv_4x128( W+128, X+128, X+160, X+192, X+224, 1024 );
|
||||||
intrlv_4x128( W+256, X+256, X+288, X+320, X+352, 1024 );
|
intrlv_4x128( W+256, X+256, X+288, X+320, X+352, 1024 );
|
||||||
intrlv_4x128( W+384, X+384, X+416, X+448, X+480, 1024 );
|
intrlv_4x128( W+384, X+384, X+416, X+448, X+480, 1024 );
|
||||||
scrypt_core_4way_simd128( (__m512i*) W, (__m512i*)V, N );
|
scrypt_core_4way_simd128( (__m512i*) W, (__m512i*)scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_4way_simd128( (__m512i*)(W+128), (__m512i*)V, N );
|
scrypt_core_4way_simd128( (__m512i*)(W+128), (__m512i*)scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_4way_simd128( (__m512i*)(W+256), (__m512i*)V, N );
|
scrypt_core_4way_simd128( (__m512i*)(W+256), (__m512i*)scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_4way_simd128( (__m512i*)(W+384), (__m512i*)V, N );
|
scrypt_core_4way_simd128( (__m512i*)(W+384), (__m512i*)scratchbuf, N );
|
||||||
dintrlv_4x128( X, X+ 32, X+ 64, X+ 96, W, 1024 );
|
dintrlv_4x128( X, X+ 32, X+ 64, X+ 96, W, 1024 );
|
||||||
dintrlv_4x128( X+128, X+160, X+192, X+224, W+128, 1024 );
|
dintrlv_4x128( X+128, X+160, X+192, X+224, W+128, 1024 );
|
||||||
dintrlv_4x128( X+256, X+288, X+320, X+352, W+256, 1024 );
|
dintrlv_4x128( X+256, X+288, X+320, X+352, W+256, 1024 );
|
||||||
@@ -1236,15 +1243,13 @@ static int scrypt_N_1_1_256_16way( const uint32_t *input, uint32_t *output,
|
|||||||
|
|
||||||
#endif // AVX512
|
#endif // AVX512
|
||||||
|
|
||||||
#if defined(__SHA__)
|
#if 0
|
||||||
|
|
||||||
static int scrypt_N_1_1_256_sha_2buf( const uint32_t *input, uint32_t *output,
|
static int scrypt_N_1_1_256_sha_2buf( const uint32_t *input, uint32_t *output,
|
||||||
uint32_t *midstate, unsigned char *scratchpad, int N, int thrid )
|
uint32_t *midstate, int N, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) tstate[ 2*8 ];
|
uint32_t _ALIGN(128) tstate[ 2*8 ];
|
||||||
uint32_t _ALIGN(128) ostate[ 2*8 ];
|
uint32_t _ALIGN(128) ostate[ 2*8 ];
|
||||||
uint32_t _ALIGN(128) W[ 2*32 ];
|
uint32_t _ALIGN(128) W[ 2*32 ];
|
||||||
uint32_t *V = (uint32_t*)scratchpad;
|
|
||||||
|
|
||||||
memcpy( tstate, midstate, 32 );
|
memcpy( tstate, midstate, 32 );
|
||||||
memcpy( tstate+ 8, midstate, 32 );
|
memcpy( tstate+ 8, midstate, 32 );
|
||||||
@@ -1254,7 +1259,7 @@ static int scrypt_N_1_1_256_sha_2buf( const uint32_t *input, uint32_t *output,
|
|||||||
PBKDF2_SHA256_80_128_SHA_2BUF( tstate, tstate+8, ostate, ostate+8,
|
PBKDF2_SHA256_80_128_SHA_2BUF( tstate, tstate+8, ostate, ostate+8,
|
||||||
input, input+20, W, W+32 );
|
input, input+20, W, W+32 );
|
||||||
|
|
||||||
scrypt_core_simd128_2buf( W, V, N );
|
scrypt_core_simd128_2buf( W, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
PBKDF2_SHA256_128_32_SHA_2BUF( tstate, tstate+8, ostate, ostate+8, W, W+32,
|
PBKDF2_SHA256_128_32_SHA_2BUF( tstate, tstate+8, ostate, ostate+8, W, W+32,
|
||||||
@@ -1264,12 +1269,11 @@ static int scrypt_N_1_1_256_sha_2buf( const uint32_t *input, uint32_t *output,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int scrypt_N_1_1_256_4way_sha( const uint32_t *input, uint32_t *output,
|
static int scrypt_N_1_1_256_4way_sha( const uint32_t *input, uint32_t *output,
|
||||||
uint32_t *midstate, unsigned char *scratchpad, int N, int thrid )
|
uint32_t *midstate, int N, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) tstate[4 * 8];
|
uint32_t _ALIGN(128) tstate[4 * 8];
|
||||||
uint32_t _ALIGN(128) ostate[4 * 8];
|
uint32_t _ALIGN(128) ostate[4 * 8];
|
||||||
uint32_t _ALIGN(128) W[4 * 32];
|
uint32_t _ALIGN(128) W[4 * 32];
|
||||||
uint32_t *V = (uint32_t*)scratchpad;
|
|
||||||
|
|
||||||
memcpy( tstate, midstate, 32 );
|
memcpy( tstate, midstate, 32 );
|
||||||
memcpy( tstate+ 8, midstate, 32 );
|
memcpy( tstate+ 8, midstate, 32 );
|
||||||
@@ -1300,9 +1304,9 @@ static int scrypt_N_1_1_256_4way_sha( const uint32_t *input, uint32_t *output,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
// working, double buffered linear simd
|
// working, double buffered linear simd
|
||||||
scrypt_core_simd128_2buf( W, V, N );
|
scrypt_core_simd128_2buf( W, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_2buf( W+64, V, N );
|
scrypt_core_simd128_2buf( W+64, scratchbuf, N );
|
||||||
|
|
||||||
/*
|
/*
|
||||||
scrypt_core_simd128_3buf( W, V, N );
|
scrypt_core_simd128_3buf( W, V, N );
|
||||||
@@ -1323,17 +1327,15 @@ static int scrypt_N_1_1_256_4way_sha( const uint32_t *input, uint32_t *output,
|
|||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#else
|
#if ( SCRYPT_THROUGHPUT == 4 )
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_4WAY
|
|
||||||
static int scrypt_N_1_1_256_4way( const uint32_t *input, uint32_t *output,
|
static int scrypt_N_1_1_256_4way( const uint32_t *input, uint32_t *output,
|
||||||
uint32_t *midstate, unsigned char *scratchpad, int N, int thrid )
|
uint32_t *midstate, int N, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) tstate[ 4*8 ];
|
uint32_t _ALIGN(128) tstate[ 4*8 ];
|
||||||
uint32_t _ALIGN(128) ostate[ 4*8 ];
|
uint32_t _ALIGN(128) ostate[ 4*8 ];
|
||||||
uint32_t _ALIGN(128) W[ 4*32 ];
|
uint32_t _ALIGN(128) W[ 4*32 ];
|
||||||
uint32_t *V = (uint32_t*)scratchpad;
|
|
||||||
|
|
||||||
intrlv_4x32( W, input, input+20, input+40, input+60, 640 );
|
intrlv_4x32( W, input, input+20, input+40, input+60, 640 );
|
||||||
for ( int i = 0; i < 8; i++ )
|
for ( int i = 0; i < 8; i++ )
|
||||||
@@ -1346,13 +1348,13 @@ static int scrypt_N_1_1_256_4way( const uint32_t *input, uint32_t *output,
|
|||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) X[ 4*32 ];
|
uint32_t _ALIGN(128) X[ 4*32 ];
|
||||||
dintrlv_4x32( X, X+32, X+64, X+96, W, 1024 );
|
dintrlv_4x32( X, X+32, X+64, X+96, W, 1024 );
|
||||||
scrypt_core_simd128_2buf( X, V, N );
|
scrypt_core_simd128_2buf( X, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_2buf( X+64, V, N );
|
scrypt_core_simd128_2buf( X+64, scratchbuf, N );
|
||||||
intrlv_4x32( W, X, X+32, X+64, X+96, 1024 );
|
intrlv_4x32( W, X, X+32, X+64, X+96, 1024 );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
scrypt_core_4way( (__m128i*)W, (__m128i*)V, N );
|
scrypt_core_4way( (__m128i*)W, (__m128i*)scratchbuf, N );
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1398,65 +1400,73 @@ static int scrypt_N_1_1_256_4way( const uint32_t *input, uint32_t *output,
|
|||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
#endif /* HAVE_SHA256_4WAY */
|
#endif // SCRYPT_THROUGHPUT == 4
|
||||||
|
|
||||||
#endif // SHA
|
//#endif // SHA
|
||||||
|
|
||||||
extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
|
extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
|
uint32_t _ALIGN(64) hash[ 8*SCRYPT_THROUGHPUT ];
|
||||||
|
uint32_t _ALIGN(64) data[ 20*SCRYPT_THROUGHPUT ];
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
|
|
||||||
uint32_t midstate[8];
|
uint32_t midstate[8];
|
||||||
uint32_t n = pdata[19] - 1;
|
uint32_t n = pdata[19] - 1;
|
||||||
int thr_id = mythr->id;
|
int thr_id = mythr->id;
|
||||||
int throughput = scrypt_throughput;
|
|
||||||
int i;
|
int i;
|
||||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
|
||||||
for ( i = 0; i < throughput; i++ )
|
for ( i = 0; i < SCRYPT_THROUGHPUT; i++ )
|
||||||
memcpy( data + i * 20, pdata, 80 );
|
memcpy( data + i * 20, pdata, 80 );
|
||||||
|
|
||||||
sha256_transform_le( midstate, data, sha256_initial_state );
|
sha256_transform_le( midstate, data, sha256_initial_state );
|
||||||
|
|
||||||
do {
|
do {
|
||||||
bool rc = true;
|
bool rc = true;
|
||||||
for ( i = 0; i < throughput; i++ ) data[ i*20 + 19 ] = ++n;
|
for ( i = 0; i < SCRYPT_THROUGHPUT; i++ ) data[ i*20 + 19 ] = ++n;
|
||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
//#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
if ( throughput == 16 )
|
#if ( SCRYPT_THROUGHPUT == 16 )
|
||||||
rc = scrypt_N_1_1_256_16way( data, hash, midstate, scratchbuf,
|
// if ( SCRYPT_THROUGHPUT == 16 )
|
||||||
opt_param_n, thr_id );
|
rc = scrypt_N_1_1_256_16way( data, hash, midstate, opt_param_n,
|
||||||
else
|
thr_id );
|
||||||
#endif
|
// else
|
||||||
#if defined(__AVX2__)
|
//#endif
|
||||||
if ( throughput == 8 )
|
//#if defined(__AVX2__)
|
||||||
rc = scrypt_N_1_1_256_8way( data, hash, midstate, scratchbuf,
|
#elif ( SCRYPT_THROUGHPUT == 8 )
|
||||||
opt_param_n, thr_id );
|
// if ( SCRYPT_THROUGHPUT == 8 )
|
||||||
else
|
rc = scrypt_N_1_1_256_8way( data, hash, midstate, opt_param_n,
|
||||||
#endif
|
thr_id );
|
||||||
if ( throughput == 4 ) // slower on Ryzen than 8way
|
// else
|
||||||
#if defined(__SHA__)
|
//#endif
|
||||||
rc = scrypt_N_1_1_256_4way_sha( data, hash, midstate, scratchbuf,
|
#elif ( SCRYPT_THROUGHPUT == 4 )
|
||||||
opt_param_n, thr_id );
|
// if ( SCRYPT_THROUGHPUT == 4 ) // slower on Ryzen than 8way
|
||||||
|
//#if defined(__SHA__)
|
||||||
|
// rc = scrypt_N_1_1_256_4way_sha( data, hash, midstate, opt_param_n,
|
||||||
|
// thr_id );
|
||||||
|
//#else
|
||||||
|
rc = scrypt_N_1_1_256_4way( data, hash, midstate, opt_param_n,
|
||||||
|
thr_id );
|
||||||
#else
|
#else
|
||||||
rc = scrypt_N_1_1_256_4way( data, hash, midstate, scratchbuf,
|
|
||||||
opt_param_n, thr_id );
|
#error "Invalid SCRYPT_THROUGHPUT"
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
/*
|
||||||
#if defined(__SHA__)
|
#if defined(__SHA__)
|
||||||
else
|
else
|
||||||
if (throughput == 2 ) // slower on Ryzen than 4way_sha & 8way
|
if ( SCRYPT_THROUGHPUT == 2 ) // slower on Ryzen than 4way_sha & 8way
|
||||||
rc = scrypt_N_1_1_256_sha_2buf( data, hash, midstate, scratchbuf,
|
rc = scrypt_N_1_1_256_sha_2buf( data, hash, midstate, opt_param_n,
|
||||||
opt_param_n, thr_id );
|
thr_id );
|
||||||
#endif
|
#endif
|
||||||
else // should never get here
|
else // should never get here
|
||||||
rc = scrypt_N_1_1_256( data, hash, midstate, scratchbuf,
|
rc = scrypt_N_1_1_256( data, hash, midstate, opt_param_n, thr_id );
|
||||||
opt_param_n, thr_id );
|
*/
|
||||||
|
|
||||||
// test the hash
|
// test the hash
|
||||||
if ( rc )
|
if ( rc )
|
||||||
for ( i = 0; i < throughput; i++ )
|
for ( i = 0; i < SCRYPT_THROUGHPUT; i++ )
|
||||||
{
|
{
|
||||||
if ( unlikely( valid_hash( hash + i*8, ptarget ) && !opt_benchmark ) )
|
if ( unlikely( valid_hash( hash + i*8, ptarget ) && !opt_benchmark ) )
|
||||||
{
|
{
|
||||||
@@ -1468,7 +1478,7 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
} while ( likely( ( n < ( max_nonce - throughput ) ) && !(*restart) ) );
|
} while ( likely( ( n < ( max_nonce - SCRYPT_THROUGHPUT ) ) && !(*restart) ) );
|
||||||
|
|
||||||
*hashes_done = n - pdata[19];
|
*hashes_done = n - pdata[19];
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
@@ -1489,7 +1499,7 @@ bool register_scrypt_algo( algo_gate_t* gate )
|
|||||||
//#if defined(__SHA__)
|
//#if defined(__SHA__)
|
||||||
// gate->optimizations = SSE2_OPT | SHA_OPT;
|
// gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||||
//#else
|
//#else
|
||||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
gate->optimizations = SSE2_OPT | SSE42_OPT | AVX_OPT | AVX2_OPT | AVX512_OPT;
|
||||||
//#endif
|
//#endif
|
||||||
gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
|
gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
|
||||||
gate->scanhash = (void*)&scanhash_scrypt;
|
gate->scanhash = (void*)&scanhash_scrypt;
|
||||||
@@ -1497,8 +1507,11 @@ bool register_scrypt_algo( algo_gate_t* gate )
|
|||||||
opt_param_n = opt_param_n ? opt_param_n : 1024;
|
opt_param_n = opt_param_n ? opt_param_n : 1024;
|
||||||
applog( LOG_INFO,"Scrypt paramaters: N= %d, R= 1", opt_param_n );
|
applog( LOG_INFO,"Scrypt paramaters: N= %d, R= 1", opt_param_n );
|
||||||
|
|
||||||
|
// scrypt_throughput can be defined at compile time and used to replace
|
||||||
|
// MAX_WAYS to reduce memory usage.
|
||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
scrypt_throughput = 16;
|
// scrypt_throughput = 16;
|
||||||
if ( opt_param_n > 0x4000 )
|
if ( opt_param_n > 0x4000 )
|
||||||
scratchbuf_size = opt_param_n * 3 * 128; // 3 buf
|
scratchbuf_size = opt_param_n * 3 * 128; // 3 buf
|
||||||
else
|
else
|
||||||
@@ -1511,13 +1524,13 @@ bool register_scrypt_algo( algo_gate_t* gate )
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#elif defined(__AVX2__)
|
#elif defined(__AVX2__)
|
||||||
scrypt_throughput = 8;
|
// scrypt_throughput = 8;
|
||||||
if ( opt_param_n > 0x4000 )
|
if ( opt_param_n > 0x4000 )
|
||||||
scratchbuf_size = opt_param_n * 3 * 128; // 3 buf
|
scratchbuf_size = opt_param_n * 3 * 128; // 3 buf
|
||||||
else
|
else
|
||||||
scratchbuf_size = opt_param_n * 2 * 128; // 2 way
|
scratchbuf_size = opt_param_n * 2 * 128; // 2 way
|
||||||
#else
|
#else
|
||||||
scrypt_throughput = 4;
|
// scrypt_throughput = 4;
|
||||||
if ( opt_param_n > 0x4000 )
|
if ( opt_param_n > 0x4000 )
|
||||||
scratchbuf_size = opt_param_n * 2 * 128; // 2 buf
|
scratchbuf_size = opt_param_n * 2 * 128; // 2 buf
|
||||||
else
|
else
|
||||||
@@ -1533,7 +1546,7 @@ bool register_scrypt_algo( algo_gate_t* gate )
|
|||||||
format_number_si( &d_size, d_units );
|
format_number_si( &d_size, d_units );
|
||||||
|
|
||||||
applog( LOG_INFO,"Throughput %d/thr, Buffer %.0f %siB/thr, Total %.0f %siB\n",
|
applog( LOG_INFO,"Throughput %d/thr, Buffer %.0f %siB/thr, Total %.0f %siB\n",
|
||||||
scrypt_throughput, t_size, t_units, d_size, d_units );
|
SCRYPT_THROUGHPUT, t_size, t_units, d_size, d_units );
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
@@ -84,6 +84,11 @@ void sha256_8way_transform_le( __m256i *state_out, const __m256i *data,
|
|||||||
void sha256_8way_transform_be( __m256i *state_out, const __m256i *data,
|
void sha256_8way_transform_be( __m256i *state_out, const __m256i *data,
|
||||||
const __m256i *state_in );
|
const __m256i *state_in );
|
||||||
|
|
||||||
|
void sha256_8way_prehash_3rounds( __m256i *state_mid, const __m256i *W,
|
||||||
|
const __m256i *state_in );
|
||||||
|
void sha256_8way_final_rounds( __m256i *state_out, const __m256i *data,
|
||||||
|
const __m256i *state_in, const __m256i *state_mid );
|
||||||
|
|
||||||
#endif // AVX2
|
#endif // AVX2
|
||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
@@ -8,7 +8,7 @@
|
|||||||
* any later version. See COPYING for more details.
|
* any later version. See COPYING for more details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "algo-gate-api.h"
|
#include "sha256d-4way.h"
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
@@ -181,6 +181,8 @@ static const uint32_t sha256d_hash1[16] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// this performs the entire hash all over again, why?
|
// this performs the entire hash all over again, why?
|
||||||
|
// because main function only does 56 rounds.
|
||||||
|
|
||||||
static void sha256d_80_swap(uint32_t *hash, const uint32_t *data)
|
static void sha256d_80_swap(uint32_t *hash, const uint32_t *data)
|
||||||
{
|
{
|
||||||
uint32_t S[16];
|
uint32_t S[16];
|
||||||
@@ -492,7 +494,7 @@ static inline void sha256d_ms(uint32_t *hash, uint32_t *W,
|
|||||||
void sha256d_ms_4way(uint32_t *hash, uint32_t *data,
|
void sha256d_ms_4way(uint32_t *hash, uint32_t *data,
|
||||||
const uint32_t *midstate, const uint32_t *prehash);
|
const uint32_t *midstate, const uint32_t *prehash);
|
||||||
|
|
||||||
static inline int scanhash_sha256d_4way( struct work *work,
|
static inline int scanhash_sha256d_4way_pooler( struct work *work,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
@@ -553,7 +555,7 @@ static inline int scanhash_sha256d_4way( struct work *work,
|
|||||||
void sha256d_ms_8way(uint32_t *hash, uint32_t *data,
|
void sha256d_ms_8way(uint32_t *hash, uint32_t *data,
|
||||||
const uint32_t *midstate, const uint32_t *prehash);
|
const uint32_t *midstate, const uint32_t *prehash);
|
||||||
|
|
||||||
static inline int scanhash_sha256d_8way( struct work *work,
|
static inline int scanhash_sha256d_8way_pooler( struct work *work,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
@@ -609,7 +611,7 @@ static inline int scanhash_sha256d_8way( struct work *work,
|
|||||||
|
|
||||||
#endif /* HAVE_SHA256_8WAY */
|
#endif /* HAVE_SHA256_8WAY */
|
||||||
|
|
||||||
int scanhash_sha256d( struct work *work,
|
int scanhash_sha256d_pooler( struct work *work,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
@@ -625,11 +627,11 @@ int scanhash_sha256d( struct work *work,
|
|||||||
|
|
||||||
#ifdef HAVE_SHA256_8WAY
|
#ifdef HAVE_SHA256_8WAY
|
||||||
if (sha256_use_8way())
|
if (sha256_use_8way())
|
||||||
return scanhash_sha256d_8way( work, max_nonce, hashes_done, mythr );
|
return scanhash_sha256d_8way_pooler( work, max_nonce, hashes_done, mythr );
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAVE_SHA256_4WAY
|
#ifdef HAVE_SHA256_4WAY
|
||||||
if (sha256_use_4way())
|
if (sha256_use_4way())
|
||||||
return scanhash_sha256d_4way( work, max_nonce, hashes_done, mythr );
|
return scanhash_sha256d_4way_pooler( work, max_nonce, hashes_done, mythr );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
memcpy(data, pdata + 16, 64);
|
memcpy(data, pdata + 16, 64);
|
||||||
@@ -690,9 +692,13 @@ int scanhash_SHA256d( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
bool register_sha256d_algo( algo_gate_t* gate )
|
bool register_sha256d_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||||
gate->scanhash = (void*)&scanhash_sha256d;
|
#if defined(SHA256D_16WAY)
|
||||||
// gate->hash = (void*)&sha256d;
|
gate->scanhash = (void*)&scanhash_sha256d_16way;
|
||||||
|
#else
|
||||||
|
gate->scanhash = (void*)&scanhash_sha256d_pooler;
|
||||||
|
#endif
|
||||||
|
// gate->hash = (void*)&sha256d;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -548,6 +548,136 @@ void sha256_8way_init( sha256_8way_context *sc )
|
|||||||
sc->val[7] = m256_const1_64( 0x5BE0CD195BE0CD19 );
|
sc->val[7] = m256_const1_64( 0x5BE0CD195BE0CD19 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Aggresive prehashing, LE byte order
|
||||||
|
void sha256_8way_prehash_3rounds( __m256i *state_mid, const __m256i *W,
|
||||||
|
const __m256i *state_in )
|
||||||
|
{
|
||||||
|
__m256i A, B, C, D, E, F, G, H;
|
||||||
|
|
||||||
|
A = _mm256_load_si256( state_in );
|
||||||
|
B = _mm256_load_si256( state_in + 1 );
|
||||||
|
C = _mm256_load_si256( state_in + 2 );
|
||||||
|
D = _mm256_load_si256( state_in + 3 );
|
||||||
|
E = _mm256_load_si256( state_in + 4 );
|
||||||
|
F = _mm256_load_si256( state_in + 5 );
|
||||||
|
G = _mm256_load_si256( state_in + 6 );
|
||||||
|
H = _mm256_load_si256( state_in + 7 );
|
||||||
|
|
||||||
|
#if !defined(__AVX512VL__)
|
||||||
|
__m256i X_xor_Y, Y_xor_Z = _mm256_xor_si256( B, C );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
|
||||||
|
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
|
||||||
|
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 2, 0 );
|
||||||
|
|
||||||
|
_mm256_store_si256( state_mid , A );
|
||||||
|
_mm256_store_si256( state_mid + 1, B );
|
||||||
|
_mm256_store_si256( state_mid + 2, C );
|
||||||
|
_mm256_store_si256( state_mid + 3, D );
|
||||||
|
_mm256_store_si256( state_mid + 4, E );
|
||||||
|
_mm256_store_si256( state_mid + 5, F );
|
||||||
|
_mm256_store_si256( state_mid + 6, G );
|
||||||
|
_mm256_store_si256( state_mid + 7, H );
|
||||||
|
}
|
||||||
|
|
||||||
|
void sha256_8way_final_rounds( __m256i *state_out, const __m256i *data,
|
||||||
|
const __m256i *state_in, const __m256i *state_mid )
|
||||||
|
{
|
||||||
|
__m256i A, B, C, D, E, F, G, H;
|
||||||
|
__m256i W[16];
|
||||||
|
|
||||||
|
memcpy_256( W, data, 16 );
|
||||||
|
|
||||||
|
A = _mm256_load_si256( state_mid );
|
||||||
|
B = _mm256_load_si256( state_mid + 1 );
|
||||||
|
C = _mm256_load_si256( state_mid + 2 );
|
||||||
|
D = _mm256_load_si256( state_mid + 3 );
|
||||||
|
E = _mm256_load_si256( state_mid + 4 );
|
||||||
|
F = _mm256_load_si256( state_mid + 5 );
|
||||||
|
G = _mm256_load_si256( state_mid + 6 );
|
||||||
|
H = _mm256_load_si256( state_mid + 7 );
|
||||||
|
|
||||||
|
// SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
|
||||||
|
// SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
|
||||||
|
// SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 2, 0 );
|
||||||
|
|
||||||
|
#if !defined(__AVX512VL__)
|
||||||
|
__m256i X_xor_Y, Y_xor_Z = _mm256_xor_si256( G, H );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 3, 0 );
|
||||||
|
SHA2s_8WAY_STEP( E, F, G, H, A, B, C, D, 4, 0 );
|
||||||
|
SHA2s_8WAY_STEP( D, E, F, G, H, A, B, C, 5, 0 );
|
||||||
|
SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 6, 0 );
|
||||||
|
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 7, 0 );
|
||||||
|
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 8, 0 );
|
||||||
|
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 9, 0 );
|
||||||
|
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 10, 0 );
|
||||||
|
SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 11, 0 );
|
||||||
|
SHA2s_8WAY_STEP( E, F, G, H, A, B, C, D, 12, 0 );
|
||||||
|
SHA2s_8WAY_STEP( D, E, F, G, H, A, B, C, 13, 0 );
|
||||||
|
SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 14, 0 );
|
||||||
|
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 15, 0 );
|
||||||
|
|
||||||
|
for ( int j = 16; j < 64; j += 16 )
|
||||||
|
{
|
||||||
|
W[ 0] = SHA2x_MEXP( 14, 9, 1, 0 );
|
||||||
|
W[ 1] = SHA2x_MEXP( 15, 10, 2, 1 );
|
||||||
|
W[ 2] = SHA2x_MEXP( 0, 11, 3, 2 );
|
||||||
|
W[ 3] = SHA2x_MEXP( 1, 12, 4, 3 );
|
||||||
|
W[ 4] = SHA2x_MEXP( 2, 13, 5, 4 );
|
||||||
|
W[ 5] = SHA2x_MEXP( 3, 14, 6, 5 );
|
||||||
|
W[ 6] = SHA2x_MEXP( 4, 15, 7, 6 );
|
||||||
|
W[ 7] = SHA2x_MEXP( 5, 0, 8, 7 );
|
||||||
|
W[ 8] = SHA2x_MEXP( 6, 1, 9, 8 );
|
||||||
|
W[ 9] = SHA2x_MEXP( 7, 2, 10, 9 );
|
||||||
|
W[10] = SHA2x_MEXP( 8, 3, 11, 10 );
|
||||||
|
W[11] = SHA2x_MEXP( 9, 4, 12, 11 );
|
||||||
|
W[12] = SHA2x_MEXP( 10, 5, 13, 12 );
|
||||||
|
W[13] = SHA2x_MEXP( 11, 6, 14, 13 );
|
||||||
|
W[14] = SHA2x_MEXP( 12, 7, 15, 14 );
|
||||||
|
W[15] = SHA2x_MEXP( 13, 8, 0, 15 );
|
||||||
|
|
||||||
|
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 0, j );
|
||||||
|
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 1, j );
|
||||||
|
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 2, j );
|
||||||
|
SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 3, j );
|
||||||
|
SHA2s_8WAY_STEP( E, F, G, H, A, B, C, D, 4, j );
|
||||||
|
SHA2s_8WAY_STEP( D, E, F, G, H, A, B, C, 5, j );
|
||||||
|
SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 6, j );
|
||||||
|
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 7, j );
|
||||||
|
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 8, j );
|
||||||
|
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 9, j );
|
||||||
|
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 10, j );
|
||||||
|
SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 11, j );
|
||||||
|
SHA2s_8WAY_STEP( E, F, G, H, A, B, C, D, 12, j );
|
||||||
|
SHA2s_8WAY_STEP( D, E, F, G, H, A, B, C, 13, j );
|
||||||
|
SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 14, j );
|
||||||
|
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 15, j );
|
||||||
|
}
|
||||||
|
|
||||||
|
A = _mm256_add_epi32( A, _mm256_load_si256( state_in ) );
|
||||||
|
B = _mm256_add_epi32( B, _mm256_load_si256( state_in + 1 ) );
|
||||||
|
C = _mm256_add_epi32( C, _mm256_load_si256( state_in + 2 ) );
|
||||||
|
D = _mm256_add_epi32( D, _mm256_load_si256( state_in + 3 ) );
|
||||||
|
E = _mm256_add_epi32( E, _mm256_load_si256( state_in + 4 ) );
|
||||||
|
F = _mm256_add_epi32( F, _mm256_load_si256( state_in + 5 ) );
|
||||||
|
G = _mm256_add_epi32( G, _mm256_load_si256( state_in + 6 ) );
|
||||||
|
H = _mm256_add_epi32( H, _mm256_load_si256( state_in + 7 ) );
|
||||||
|
|
||||||
|
_mm256_store_si256( state_out , A );
|
||||||
|
_mm256_store_si256( state_out + 1, B );
|
||||||
|
_mm256_store_si256( state_out + 2, C );
|
||||||
|
_mm256_store_si256( state_out + 3, D );
|
||||||
|
_mm256_store_si256( state_out + 4, E );
|
||||||
|
_mm256_store_si256( state_out + 5, F );
|
||||||
|
_mm256_store_si256( state_out + 6, G );
|
||||||
|
_mm256_store_si256( state_out + 7, H );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// need to handle odd byte length for yespower.
|
// need to handle odd byte length for yespower.
|
||||||
// Assume only last update is odd.
|
// Assume only last update is odd.
|
||||||
|
|
||||||
|
@@ -53,4 +53,8 @@ void sha256_ni2way_transform_be( uint32_t *out_X, uint32_t*out_Y,
|
|||||||
#define sha256_transform_be sph_sha256_transform_be
|
#define sha256_transform_be sph_sha256_transform_be
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// SHA can't do only 3 rounds
|
||||||
|
#define sha256_prehash_3rounds sph_sha256_prehash_3rounds
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
#include "sha256t-gate.h"
|
#include "sha256d-4way.h"
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@@ -13,7 +13,7 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
__m512i block[16] __attribute__ ((aligned (64)));
|
__m512i block[16] __attribute__ ((aligned (64)));
|
||||||
__m512i hash32[8] __attribute__ ((aligned (32)));
|
__m512i hash32[8] __attribute__ ((aligned (32)));
|
||||||
__m512i initstate[8] __attribute__ ((aligned (32)));
|
__m512i initstate[8] __attribute__ ((aligned (32)));
|
||||||
__m512i midstate[8] __attribute__ ((aligned (32)));
|
__m512i midstate1[8] __attribute__ ((aligned (32)));
|
||||||
__m512i midstate2[8] __attribute__ ((aligned (32)));
|
__m512i midstate2[8] __attribute__ ((aligned (32)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
__m512i vdata[20] __attribute__ ((aligned (32)));
|
__m512i vdata[20] __attribute__ ((aligned (32)));
|
||||||
@@ -46,11 +46,10 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
initstate[6] = m512_const1_64( 0x1F83D9AB1F83D9AB );
|
initstate[6] = m512_const1_64( 0x1F83D9AB1F83D9AB );
|
||||||
initstate[7] = m512_const1_64( 0x5BE0CD195BE0CD19 );
|
initstate[7] = m512_const1_64( 0x5BE0CD195BE0CD19 );
|
||||||
|
|
||||||
// hash first 64 byte block of data
|
sha256_16way_transform_le( midstate1, vdata, initstate );
|
||||||
sha256_16way_transform_le( midstate, vdata, initstate );
|
|
||||||
|
|
||||||
// Do 3 rounds on the first 12 bytes of the next block
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
sha256_16way_prehash_3rounds( midstate2, vdata + 16, midstate );
|
sha256_16way_prehash_3rounds( midstate2, vdata + 16, midstate1 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
@@ -59,7 +58,7 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
block[ 4] = last_byte;
|
block[ 4] = last_byte;
|
||||||
memset_zero_512( block + 5, 10 );
|
memset_zero_512( block + 5, 10 );
|
||||||
block[15] = m512_const1_32( 80*8 ); // bit count
|
block[15] = m512_const1_32( 80*8 ); // bit count
|
||||||
sha256_16way_final_rounds( hash32, block, midstate, midstate2 );
|
sha256_16way_final_rounds( hash32, block, midstate1, midstate2 );
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
// 2. 32 byte hash from 1.
|
||||||
memcpy_512( block, hash32, 8 );
|
memcpy_512( block, hash32, 8 );
|
||||||
@@ -99,7 +98,8 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
__m256i block[16] __attribute__ ((aligned (64)));
|
__m256i block[16] __attribute__ ((aligned (64)));
|
||||||
__m256i hash32[8] __attribute__ ((aligned (32)));
|
__m256i hash32[8] __attribute__ ((aligned (32)));
|
||||||
__m256i initstate[8] __attribute__ ((aligned (32)));
|
__m256i initstate[8] __attribute__ ((aligned (32)));
|
||||||
__m256i midstate[8] __attribute__ ((aligned (32)));
|
__m256i midstate1[8] __attribute__ ((aligned (32)));
|
||||||
|
__m256i midstate2[8] __attribute__ ((aligned (32)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
__m256i vdata[20] __attribute__ ((aligned (32)));
|
__m256i vdata[20] __attribute__ ((aligned (32)));
|
||||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
@@ -116,7 +116,7 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
const __m256i eight = m256_const1_32( 8 );
|
const __m256i eight = m256_const1_32( 8 );
|
||||||
|
|
||||||
for ( int i = 0; i < 19; i++ )
|
for ( int i = 0; i < 19; i++ )
|
||||||
vdata[i] = m256_const1_32( pdata[i] );
|
vdata[i] = m256_const1_32( pdata[i] );
|
||||||
|
|
||||||
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
@@ -130,8 +130,10 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
initstate[6] = m256_const1_64( 0x1F83D9AB1F83D9AB );
|
initstate[6] = m256_const1_64( 0x1F83D9AB1F83D9AB );
|
||||||
initstate[7] = m256_const1_64( 0x5BE0CD195BE0CD19 );
|
initstate[7] = m256_const1_64( 0x5BE0CD195BE0CD19 );
|
||||||
|
|
||||||
// hash first 64 bytes of data
|
sha256_8way_transform_le( midstate1, vdata, initstate );
|
||||||
sha256_8way_transform_le( midstate, vdata, initstate );
|
|
||||||
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
|
sha256_8way_prehash_3rounds( midstate2, vdata + 16, midstate1 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
@@ -140,7 +142,7 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
block[ 4] = last_byte;
|
block[ 4] = last_byte;
|
||||||
memset_zero_256( block + 5, 10 );
|
memset_zero_256( block + 5, 10 );
|
||||||
block[15] = m256_const1_32( 80*8 ); // bit count
|
block[15] = m256_const1_32( 80*8 ); // bit count
|
||||||
sha256_8way_transform_le( hash32, block, midstate );
|
sha256_8way_final_rounds( hash32, block, midstate1, midstate2 );
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
// 2. 32 byte hash from 1.
|
||||||
memcpy_256( block, hash32, 8 );
|
memcpy_256( block, hash32, 8 );
|
||||||
@@ -253,3 +255,20 @@ int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
bool register_sha256d_algo( algo_gate_t* gate )
|
||||||
|
{
|
||||||
|
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||||
|
#if defined(SHA256D_16WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_sha256d_16way;
|
||||||
|
#elif defined(SHA256D_8WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_sha256d_8way;
|
||||||
|
#elif defined(SHA256D_4WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_sha256d_4way;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// gate->hash = (void*)&sha256d;
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
*/
|
||||||
|
|
||||||
|
48
algo/sha/sha256d-4way.h
Normal file
48
algo/sha/sha256d-4way.h
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
#ifndef __SHA256D_4WAY_H__
|
||||||
|
#define __SHA256D_4WAY_H__ 1
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include "algo-gate-api.h"
|
||||||
|
|
||||||
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
#define SHA256D_16WAY 1
|
||||||
|
/*
|
||||||
|
#elif defined(__AVX2__)
|
||||||
|
#define SHA256D_8WAY 1
|
||||||
|
#else
|
||||||
|
#define SHA256D_4WAY 1
|
||||||
|
*/
|
||||||
|
#endif
|
||||||
|
|
||||||
|
bool register_sha256d_algo( algo_gate_t* gate );
|
||||||
|
|
||||||
|
#if defined(SHA256D_16WAY)
|
||||||
|
|
||||||
|
int scanhash_sha256d_16way( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
#endif
|
||||||
|
/*
|
||||||
|
#if defined(SHA256D_8WAY)
|
||||||
|
|
||||||
|
int scanhash_sha256d_8way( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(SHA256D_4WAY)
|
||||||
|
|
||||||
|
int scanhash_sha256d_4way( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
#endif
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
#if defined(__SHA__)
|
||||||
|
|
||||||
|
int scanhash_sha256d( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
|
#endif
|
||||||
|
*/
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@@ -13,7 +13,7 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
__m512i block[16] __attribute__ ((aligned (64)));
|
__m512i block[16] __attribute__ ((aligned (64)));
|
||||||
__m512i hash32[8] __attribute__ ((aligned (32)));
|
__m512i hash32[8] __attribute__ ((aligned (32)));
|
||||||
__m512i initstate[8] __attribute__ ((aligned (32)));
|
__m512i initstate[8] __attribute__ ((aligned (32)));
|
||||||
__m512i midstate[8] __attribute__ ((aligned (32)));
|
__m512i midstate1[8] __attribute__ ((aligned (32)));
|
||||||
__m512i midstate2[8] __attribute__ ((aligned (32)));
|
__m512i midstate2[8] __attribute__ ((aligned (32)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
__m512i vdata[20] __attribute__ ((aligned (32)));
|
__m512i vdata[20] __attribute__ ((aligned (32)));
|
||||||
@@ -31,7 +31,7 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
const __m512i sixteen = m512_const1_32( 16 );
|
const __m512i sixteen = m512_const1_32( 16 );
|
||||||
|
|
||||||
for ( int i = 0; i < 19; i++ )
|
for ( int i = 0; i < 19; i++ )
|
||||||
vdata[i] = m512_const1_32( pdata[i] );
|
vdata[i] = m512_const1_32( pdata[i] );
|
||||||
|
|
||||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
||||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||||
@@ -46,11 +46,10 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
initstate[6] = m512_const1_64( 0x1F83D9AB1F83D9AB );
|
initstate[6] = m512_const1_64( 0x1F83D9AB1F83D9AB );
|
||||||
initstate[7] = m512_const1_64( 0x5BE0CD195BE0CD19 );
|
initstate[7] = m512_const1_64( 0x5BE0CD195BE0CD19 );
|
||||||
|
|
||||||
// hash first 64 byte block of data
|
sha256_16way_transform_le( midstate1, vdata, initstate );
|
||||||
sha256_16way_transform_le( midstate, vdata, initstate );
|
|
||||||
|
|
||||||
// Do 3 rounds on the first 12 bytes of the next block
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
sha256_16way_prehash_3rounds( midstate2, vdata + 16, midstate );
|
sha256_16way_prehash_3rounds( midstate2, vdata + 16, midstate1 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
@@ -59,7 +58,7 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
block[ 4] = last_byte;
|
block[ 4] = last_byte;
|
||||||
memset_zero_512( block + 5, 10 );
|
memset_zero_512( block + 5, 10 );
|
||||||
block[15] = m512_const1_32( 80*8 ); // bit count
|
block[15] = m512_const1_32( 80*8 ); // bit count
|
||||||
sha256_16way_final_rounds( hash32, block, midstate, midstate2 );
|
sha256_16way_final_rounds( hash32, block, midstate1, midstate2 );
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
// 2. 32 byte hash from 1.
|
||||||
memcpy_512( block, hash32, 8 );
|
memcpy_512( block, hash32, 8 );
|
||||||
@@ -104,7 +103,8 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
__m256i block[16] __attribute__ ((aligned (64)));
|
__m256i block[16] __attribute__ ((aligned (64)));
|
||||||
__m256i hash32[8] __attribute__ ((aligned (32)));
|
__m256i hash32[8] __attribute__ ((aligned (32)));
|
||||||
__m256i initstate[8] __attribute__ ((aligned (32)));
|
__m256i initstate[8] __attribute__ ((aligned (32)));
|
||||||
__m256i midstate[8] __attribute__ ((aligned (32)));
|
__m256i midstate1[8] __attribute__ ((aligned (32)));
|
||||||
|
__m256i midstate2[8] __attribute__ ((aligned (32)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
__m256i vdata[20] __attribute__ ((aligned (32)));
|
__m256i vdata[20] __attribute__ ((aligned (32)));
|
||||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
@@ -121,7 +121,7 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
const __m256i eight = m256_const1_32( 8 );
|
const __m256i eight = m256_const1_32( 8 );
|
||||||
|
|
||||||
for ( int i = 0; i < 19; i++ )
|
for ( int i = 0; i < 19; i++ )
|
||||||
vdata[i] = m256_const1_32( pdata[i] );
|
vdata[i] = m256_const1_32( pdata[i] );
|
||||||
|
|
||||||
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
@@ -135,8 +135,10 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
initstate[6] = m256_const1_64( 0x1F83D9AB1F83D9AB );
|
initstate[6] = m256_const1_64( 0x1F83D9AB1F83D9AB );
|
||||||
initstate[7] = m256_const1_64( 0x5BE0CD195BE0CD19 );
|
initstate[7] = m256_const1_64( 0x5BE0CD195BE0CD19 );
|
||||||
|
|
||||||
// hash first 64 bytes of data
|
sha256_8way_transform_le( midstate1, vdata, initstate );
|
||||||
sha256_8way_transform_le( midstate, vdata, initstate );
|
|
||||||
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
|
sha256_8way_prehash_3rounds( midstate2, vdata + 16, midstate1 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
@@ -145,7 +147,7 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
block[ 4] = last_byte;
|
block[ 4] = last_byte;
|
||||||
memset_zero_256( block + 5, 10 );
|
memset_zero_256( block + 5, 10 );
|
||||||
block[15] = m256_const1_32( 80*8 ); // bit count
|
block[15] = m256_const1_32( 80*8 ); // bit count
|
||||||
sha256_8way_transform_le( hash32, block, midstate );
|
sha256_8way_final_rounds( hash32, block, midstate1, midstate2 );
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
// 2. 32 byte hash from 1.
|
||||||
memcpy_256( block, hash32, 8 );
|
memcpy_256( block, hash32, 8 );
|
||||||
|
@@ -702,6 +702,36 @@ memcpy( state_out, state_in, 32 );
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sph_sha256_prehash_3rounds( uint32_t *state_out, const uint32_t *data,
|
||||||
|
const uint32_t *state_in )
|
||||||
|
{
|
||||||
|
uint32_t t1, t2, X_xor_Y, Y_xor_Z = state_in[1] ^ state_in[2];
|
||||||
|
memcpy( state_out, state_in, 32 );
|
||||||
|
|
||||||
|
t1 = state_out[7] + BSG2_1( state_out[4] )
|
||||||
|
+ CH( state_out[4], state_out[5], state_out[6] ) + 0x428A2F98 + data[0];
|
||||||
|
t2 = BSG2_0( state_out[0] )
|
||||||
|
+ MAJ( state_out[0], state_out[1], state_out[2] );
|
||||||
|
Y_xor_Z = X_xor_Y;
|
||||||
|
state_out[3] += t1;
|
||||||
|
state_out[7] = t1 + t2;
|
||||||
|
|
||||||
|
t1 = state_out[6] + BSG2_1( state_out[3] )
|
||||||
|
+ CH( state_out[3], state_out[4], state_out[5] ) + 0x71374491 + data[1];
|
||||||
|
t2 = BSG2_0( state_out[7] )
|
||||||
|
+ MAJ( state_out[7], state_out[0], state_out[1] );
|
||||||
|
Y_xor_Z = X_xor_Y;
|
||||||
|
state_out[2] += t1;
|
||||||
|
state_out[6] = t1 + t2;
|
||||||
|
|
||||||
|
t1 = state_out[5] + BSG2_1( state_out[2] )
|
||||||
|
+ CH( state_out[2], state_out[3], state_out[4] ) + 0xB5C0FBCF + data[2];
|
||||||
|
t2 = BSG2_0( state_out[6] )
|
||||||
|
+ MAJ( state_out[6], state_out[7], state_out[0] );
|
||||||
|
state_out[1] += t1;
|
||||||
|
state_out[5] = t1 + t2;
|
||||||
|
}
|
||||||
|
|
||||||
/* see sph_sha2.h */
|
/* see sph_sha2.h */
|
||||||
void
|
void
|
||||||
sph_sha224_init(void *cc)
|
sph_sha224_init(void *cc)
|
||||||
|
@@ -215,6 +215,9 @@ void sph_sha256_transform_le( uint32_t *state_out, const uint32_t *data,
|
|||||||
void sph_sha256_transform_be( uint32_t *state_out, const uint32_t *data,
|
void sph_sha256_transform_be( uint32_t *state_out, const uint32_t *data,
|
||||||
const uint32_t *state_in );
|
const uint32_t *state_in );
|
||||||
|
|
||||||
|
void sph_sha256_prehash_3rounds( uint32_t *state_out, const uint32_t *data,
|
||||||
|
const uint32_t *state_in );
|
||||||
|
|
||||||
|
|
||||||
#if SPH_64
|
#if SPH_64
|
||||||
|
|
||||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.18.1.
|
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.18.2.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='cpuminer-opt'
|
PACKAGE_NAME='cpuminer-opt'
|
||||||
PACKAGE_TARNAME='cpuminer-opt'
|
PACKAGE_TARNAME='cpuminer-opt'
|
||||||
PACKAGE_VERSION='3.18.1'
|
PACKAGE_VERSION='3.18.2'
|
||||||
PACKAGE_STRING='cpuminer-opt 3.18.1'
|
PACKAGE_STRING='cpuminer-opt 3.18.2'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures cpuminer-opt 3.18.1 to adapt to many kinds of systems.
|
\`configure' configures cpuminer-opt 3.18.2 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1404,7 +1404,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of cpuminer-opt 3.18.1:";;
|
short | recursive ) echo "Configuration of cpuminer-opt 3.18.2:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1509,7 +1509,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
cpuminer-opt configure 3.18.1
|
cpuminer-opt configure 3.18.2
|
||||||
generated by GNU Autoconf 2.69
|
generated by GNU Autoconf 2.69
|
||||||
|
|
||||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by cpuminer-opt $as_me 3.18.1, which was
|
It was created by cpuminer-opt $as_me 3.18.2, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
$ $0 $@
|
$ $0 $@
|
||||||
@@ -2993,7 +2993,7 @@ fi
|
|||||||
|
|
||||||
# Define the identity of the package.
|
# Define the identity of the package.
|
||||||
PACKAGE='cpuminer-opt'
|
PACKAGE='cpuminer-opt'
|
||||||
VERSION='3.18.1'
|
VERSION='3.18.2'
|
||||||
|
|
||||||
|
|
||||||
cat >>confdefs.h <<_ACEOF
|
cat >>confdefs.h <<_ACEOF
|
||||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by cpuminer-opt $as_me 3.18.1, which was
|
This file was extended by cpuminer-opt $as_me 3.18.2, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
cpuminer-opt config.status 3.18.1
|
cpuminer-opt config.status 3.18.2
|
||||||
configured by $0, generated by GNU Autoconf 2.69,
|
configured by $0, generated by GNU Autoconf 2.69,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
AC_INIT([cpuminer-opt], [3.18.1])
|
AC_INIT([cpuminer-opt], [3.18.2])
|
||||||
|
|
||||||
AC_PREREQ([2.59c])
|
AC_PREREQ([2.59c])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_SYSTEM
|
||||||
|
61
cpu-miner.c
61
cpu-miner.c
@@ -1112,19 +1112,17 @@ void report_summary_log( bool force )
|
|||||||
applog( LOG_BLUE, "%s: %s", algo_names[ opt_algo ], short_url );
|
applog( LOG_BLUE, "%s: %s", algo_names[ opt_algo ], short_url );
|
||||||
applog2( LOG_NOTICE, "Periodic Report %s %s", et_str, upt_str );
|
applog2( LOG_NOTICE, "Periodic Report %s %s", et_str, upt_str );
|
||||||
applog2( LOG_INFO, "Share rate %.2f/min %.2f/min",
|
applog2( LOG_INFO, "Share rate %.2f/min %.2f/min",
|
||||||
submit_rate, (double)submitted_share_count*60. /
|
submit_rate, safe_div( (double)submitted_share_count*60.,
|
||||||
( (double)uptime.tv_sec + (double)uptime.tv_usec / 1e6 ) );
|
( (double)uptime.tv_sec + (double)uptime.tv_usec / 1e6 ), 0. ) );
|
||||||
applog2( LOG_INFO, "Hash rate %7.2f%sh/s %7.2f%sh/s (%.2f%sh/s)",
|
applog2( LOG_INFO, "Hash rate %7.2f%sh/s %7.2f%sh/s (%.2f%sh/s)",
|
||||||
shrate, shr_units, sess_hrate, sess_hr_units, ghrate, ghr_units );
|
shrate, shr_units, sess_hrate, sess_hr_units, ghrate, ghr_units );
|
||||||
|
|
||||||
if ( accepted_share_count < submitted_share_count )
|
if ( accepted_share_count < submitted_share_count )
|
||||||
{
|
{
|
||||||
double lost_ghrate = uptime.tv_sec == 0 ? 0.
|
double lost_ghrate = safe_div( target_diff
|
||||||
: target_diff
|
* (double)(submitted_share_count - accepted_share_count ),
|
||||||
* (double)(submitted_share_count - accepted_share_count )
|
(double)uptime.tv_sec, 0. );
|
||||||
/ (double)uptime.tv_sec;
|
double lost_shrate = safe_div( target_diff * (double)(submits - accepts ), share_time, 0. );
|
||||||
double lost_shrate = share_time == 0. ? 0.
|
|
||||||
: target_diff * (double)(submits - accepts ) / share_time;
|
|
||||||
char lshr_units[4] = {0};
|
char lshr_units[4] = {0};
|
||||||
char lghr_units[4] = {0};
|
char lghr_units[4] = {0};
|
||||||
scale_hash_for_display( &lost_shrate, lshr_units );
|
scale_hash_for_display( &lost_shrate, lshr_units );
|
||||||
@@ -2495,18 +2493,21 @@ static void *miner_thread( void *userdata )
|
|||||||
timeval_subtract( &uptime, &total_hashes_time, &session_start );
|
timeval_subtract( &uptime, &total_hashes_time, &session_start );
|
||||||
double hashrate = safe_div( total_hashes, uptime.tv_sec, 0. );
|
double hashrate = safe_div( total_hashes, uptime.tv_sec, 0. );
|
||||||
|
|
||||||
scale_hash_for_display( &hashrate, hr_units );
|
if ( hashrate > 0. )
|
||||||
sprintf( hr, "%.2f", hashrate );
|
{
|
||||||
|
scale_hash_for_display( &hashrate, hr_units );
|
||||||
|
sprintf( hr, "%.2f", hashrate );
|
||||||
#if (defined(_WIN64) || defined(__WINDOWS__) || defined(_WIN32))
|
#if (defined(_WIN64) || defined(__WINDOWS__) || defined(_WIN32))
|
||||||
applog( LOG_NOTICE, "Total: %s %sH/s", hr, hr_units );
|
applog( LOG_NOTICE, "Total: %s %sH/s", hr, hr_units );
|
||||||
#else
|
#else
|
||||||
float lo_freq = 0., hi_freq = 0.;
|
float lo_freq = 0., hi_freq = 0.;
|
||||||
linux_cpu_hilo_freq( &lo_freq, &hi_freq );
|
linux_cpu_hilo_freq( &lo_freq, &hi_freq );
|
||||||
applog( LOG_NOTICE,
|
applog( LOG_NOTICE,
|
||||||
"Total: %s %sH/s, Temp: %dC, Freq: %.3f/%.3f GHz",
|
"Total: %s %sH/s, Temp: %dC, Freq: %.3f/%.3f GHz",
|
||||||
hr, hr_units, (uint32_t)cpu_temp(0), lo_freq / 1e6,
|
hr, hr_units, (uint32_t)cpu_temp(0), lo_freq / 1e6,
|
||||||
hi_freq / 1e6 );
|
hi_freq / 1e6 );
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} // benchmark
|
} // benchmark
|
||||||
|
|
||||||
@@ -2900,6 +2901,7 @@ static bool cpu_capability( bool display_only )
|
|||||||
bool algo_has_sse2 = set_incl( SSE2_OPT, algo_features );
|
bool algo_has_sse2 = set_incl( SSE2_OPT, algo_features );
|
||||||
bool algo_has_aes = set_incl( AES_OPT, algo_features );
|
bool algo_has_aes = set_incl( AES_OPT, algo_features );
|
||||||
bool algo_has_sse42 = set_incl( SSE42_OPT, algo_features );
|
bool algo_has_sse42 = set_incl( SSE42_OPT, algo_features );
|
||||||
|
bool algo_has_avx = set_incl( AVX_OPT, algo_features );
|
||||||
bool algo_has_avx2 = set_incl( AVX2_OPT, algo_features );
|
bool algo_has_avx2 = set_incl( AVX2_OPT, algo_features );
|
||||||
bool algo_has_avx512 = set_incl( AVX512_OPT, algo_features );
|
bool algo_has_avx512 = set_incl( AVX512_OPT, algo_features );
|
||||||
bool algo_has_sha = set_incl( SHA_OPT, algo_features );
|
bool algo_has_sha = set_incl( SHA_OPT, algo_features );
|
||||||
@@ -2907,6 +2909,8 @@ static bool cpu_capability( bool display_only )
|
|||||||
bool algo_has_vaes256 = set_incl( VAES256_OPT, algo_features );
|
bool algo_has_vaes256 = set_incl( VAES256_OPT, algo_features );
|
||||||
bool use_aes;
|
bool use_aes;
|
||||||
bool use_sse2;
|
bool use_sse2;
|
||||||
|
bool use_sse42;
|
||||||
|
bool use_avx;
|
||||||
bool use_avx2;
|
bool use_avx2;
|
||||||
bool use_avx512;
|
bool use_avx512;
|
||||||
bool use_sha;
|
bool use_sha;
|
||||||
@@ -2976,18 +2980,21 @@ static bool cpu_capability( bool display_only )
|
|||||||
else if ( sw_has_aes ) printf( " AES" );
|
else if ( sw_has_aes ) printf( " AES" );
|
||||||
if ( sw_has_sha ) printf( " SHA" );
|
if ( sw_has_sha ) printf( " SHA" );
|
||||||
|
|
||||||
printf("\nAlgo features:");
|
if ( !display_only )
|
||||||
if ( algo_features == EMPTY_SET ) printf( " None" );
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
if ( algo_has_avx512 ) printf( " AVX512" );
|
printf("\nAlgo features:");
|
||||||
else if ( algo_has_avx2 ) printf( " AVX2 " );
|
if ( algo_features == EMPTY_SET ) printf( " None" );
|
||||||
else if ( algo_has_sse42 ) printf( " SSE4.2" );
|
else
|
||||||
else if ( algo_has_sse2 ) printf( " SSE2 " );
|
{
|
||||||
if ( algo_has_vaes ||
|
if ( algo_has_avx512 ) printf( " AVX512" );
|
||||||
algo_has_vaes256 ) printf( " VAES" );
|
else if ( algo_has_avx2 ) printf( " AVX2 " );
|
||||||
else if ( algo_has_aes ) printf( " AES" );
|
else if ( algo_has_sse42 ) printf( " SSE4.2" );
|
||||||
if ( algo_has_sha ) printf( " SHA" );
|
else if ( algo_has_sse2 ) printf( " SSE2 " );
|
||||||
|
if ( algo_has_vaes ||
|
||||||
|
algo_has_vaes256 ) printf( " VAES" );
|
||||||
|
else if ( algo_has_aes ) printf( " AES" );
|
||||||
|
if ( algo_has_sha ) printf( " SHA" );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
@@ -3022,6 +3029,8 @@ static bool cpu_capability( bool display_only )
|
|||||||
|
|
||||||
// Determine mining options
|
// Determine mining options
|
||||||
use_sse2 = cpu_has_sse2 && algo_has_sse2;
|
use_sse2 = cpu_has_sse2 && algo_has_sse2;
|
||||||
|
use_sse42 = cpu_has_sse42 && sw_has_sse42 && algo_has_sse42;
|
||||||
|
use_avx = cpu_has_avx && sw_has_avx && algo_has_avx;
|
||||||
use_aes = cpu_has_aes && sw_has_aes && algo_has_aes;
|
use_aes = cpu_has_aes && sw_has_aes && algo_has_aes;
|
||||||
use_avx2 = cpu_has_avx2 && sw_has_avx2 && algo_has_avx2;
|
use_avx2 = cpu_has_avx2 && sw_has_avx2 && algo_has_avx2;
|
||||||
use_avx512 = cpu_has_avx512 && sw_has_avx512 && algo_has_avx512;
|
use_avx512 = cpu_has_avx512 && sw_has_avx512 && algo_has_avx512;
|
||||||
@@ -3038,6 +3047,8 @@ static bool cpu_capability( bool display_only )
|
|||||||
{
|
{
|
||||||
if ( use_avx512 ) printf( " AVX512" );
|
if ( use_avx512 ) printf( " AVX512" );
|
||||||
else if ( use_avx2 ) printf( " AVX2" );
|
else if ( use_avx2 ) printf( " AVX2" );
|
||||||
|
else if ( use_avx ) printf( " AVX" );
|
||||||
|
else if ( use_sse42 ) printf( " SSE42" );
|
||||||
else if ( use_sse2 ) printf( " SSE2" );
|
else if ( use_sse2 ) printf( " SSE2" );
|
||||||
if ( use_vaes ) printf( " VAES" );
|
if ( use_vaes ) printf( " VAES" );
|
||||||
else if ( use_aes ) printf( " AES" );
|
else if ( use_aes ) printf( " AES" );
|
||||||
|
18
miner.h
18
miner.h
@@ -868,9 +868,9 @@ Options:\n\
|
|||||||
yespowerr16 Yenten (YTN)\n\
|
yespowerr16 Yenten (YTN)\n\
|
||||||
yespower-b2b generic yespower + blake2b\n\
|
yespower-b2b generic yespower + blake2b\n\
|
||||||
zr5 Ziftr\n\
|
zr5 Ziftr\n\
|
||||||
-N, --param-n N parameter for scrypt based algos\n\
|
-N, --param-n=N N parameter for scrypt based algos\n\
|
||||||
-R, --param-r R parameter for scrypt based algos\n\
|
-R, --param-r=N R parameter for scrypt based algos\n\
|
||||||
-K, --param-key Key (pers) parameter for algos that use it\n\
|
-K, --param-key=STRING Key (pers) parameter for algos that use it\n\
|
||||||
-o, --url=URL URL of mining server\n\
|
-o, --url=URL URL of mining server\n\
|
||||||
-O, --userpass=U:P username:password pair for mining server\n\
|
-O, --userpass=U:P username:password pair for mining server\n\
|
||||||
-u, --user=USERNAME username for mining server\n\
|
-u, --user=USERNAME username for mining server\n\
|
||||||
@@ -886,8 +886,8 @@ Options:\n\
|
|||||||
-s, --scantime=N upper bound on time spent scanning current work when\n\
|
-s, --scantime=N upper bound on time spent scanning current work when\n\
|
||||||
long polling is unavailable, in seconds (default: 5)\n\
|
long polling is unavailable, in seconds (default: 5)\n\
|
||||||
--randomize Randomize scan range start to reduce duplicates\n\
|
--randomize Randomize scan range start to reduce duplicates\n\
|
||||||
-f, --diff-factor Divide req. difficulty by this factor (std is 1.0)\n\
|
-f, --diff-factor=N Divide req. difficulty by this factor (std is 1.0)\n\
|
||||||
-m, --diff-multiplier Multiply difficulty by this factor (std is 1.0)\n\
|
-m, --diff-multiplier=N Multiply difficulty by this factor (std is 1.0)\n\
|
||||||
--hash-meter Display thread hash rates\n\
|
--hash-meter Display thread hash rates\n\
|
||||||
--coinbase-addr=ADDR payout address for solo mining\n\
|
--coinbase-addr=ADDR payout address for solo mining\n\
|
||||||
--coinbase-sig=TEXT data to insert in the coinbase when possible\n\
|
--coinbase-sig=TEXT data to insert in the coinbase when possible\n\
|
||||||
@@ -895,9 +895,9 @@ Options:\n\
|
|||||||
--no-getwork disable getwork support\n\
|
--no-getwork disable getwork support\n\
|
||||||
--no-gbt disable getblocktemplate support\n\
|
--no-gbt disable getblocktemplate support\n\
|
||||||
--no-stratum disable X-Stratum support\n\
|
--no-stratum disable X-Stratum support\n\
|
||||||
--no-extranonce disable Stratum extranonce support\n\
|
--no-extranonce disable Stratum extranonce subscribe\n\
|
||||||
--no-redirect ignore requests to change the URL of the mining server\n\
|
--no-redirect ignore requests to change the URL of the mining server\n\
|
||||||
-q, --quiet disable per-thread hashmeter output\n\
|
-q, --quiet reduce log verbosity\n\
|
||||||
--no-color disable colored output\n\
|
--no-color disable colored output\n\
|
||||||
-D, --debug enable debug output\n\
|
-D, --debug enable debug output\n\
|
||||||
-P, --protocol-dump verbose dump of protocol-level activities\n"
|
-P, --protocol-dump verbose dump of protocol-level activities\n"
|
||||||
@@ -916,9 +916,9 @@ Options:\n\
|
|||||||
--max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\
|
--max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\
|
||||||
--max-diff=N Only mine if net difficulty is less than specified value\n\
|
--max-diff=N Only mine if net difficulty is less than specified value\n\
|
||||||
-c, --config=FILE load a JSON-format configuration file\n\
|
-c, --config=FILE load a JSON-format configuration file\n\
|
||||||
--data-file path and name of data file\n\
|
--data-file=FILE path and name of data file\n\
|
||||||
--verify enable additional time consuming start up tests\n\
|
--verify enable additional time consuming start up tests\n\
|
||||||
-V, --version display version information and exit\n\
|
-V, --version display version and CPU information and exit\n\
|
||||||
-h, --help display this help text and exit\n\
|
-h, --help display this help text and exit\n\
|
||||||
";
|
";
|
||||||
|
|
||||||
|
@@ -2,22 +2,21 @@
|
|||||||
#define SIMD_INT_H__ 1
|
#define SIMD_INT_H__ 1
|
||||||
|
|
||||||
// Endian byte swap
|
// Endian byte swap
|
||||||
#define bswap_64( a ) __builtin_bswap64( a )
|
#define bswap_64 __builtin_bswap64
|
||||||
#define bswap_32( a ) __builtin_bswap32( a )
|
#define bswap_32 __builtin_bswap32
|
||||||
|
|
||||||
|
// Bit rotation
|
||||||
|
#define rol64 __rolq
|
||||||
|
#define ror64 __rorq
|
||||||
|
#define rol32 __rold
|
||||||
|
#define ror32 __rord
|
||||||
|
|
||||||
// Safe division, integer or floating point. For floating point it's as
|
// Safe division, integer or floating point. For floating point it's as
|
||||||
// safe as 0. is precisely zero.
|
// safe as 0 is precisely zero.
|
||||||
// Returns safe_result if division by zero.
|
// Returns safe_result if division by zero, typically zero.
|
||||||
#define safe_div( dividend, divisor, safe_result ) \
|
#define safe_div( dividend, divisor, safe_result ) \
|
||||||
( (divisor) == 0 ? safe_result : ( (dividend) / (divisor) ) )
|
( (divisor) == 0 ? safe_result : ( (dividend) / (divisor) ) )
|
||||||
|
|
||||||
// Aliases with familiar names for built in bit rotate instructions
|
|
||||||
#define rol64( a, n ) _lrotl( a, n )
|
|
||||||
#define ror64( a, n ) _lrotr( a, n )
|
|
||||||
#define rol32( a, n ) _rotl( a, n )
|
|
||||||
#define ror32( a, n ) _rotr( a, n )
|
|
||||||
#define rol16( a, n ) _rotwl( a, n )
|
|
||||||
#define ror16( a, n ) _rotwr( a, n )
|
|
||||||
|
|
||||||
///////////////////////////////////////
|
///////////////////////////////////////
|
||||||
//
|
//
|
||||||
|
Reference in New Issue
Block a user