Compare commits

..

2 Commits

Author SHA1 Message Date
Jay D Dee
5b678d2481 v3.19.6 2022-02-21 23:14:24 -05:00
Jay D Dee
90137b391e v3.19.5 2022-01-30 20:59:54 -05:00
30 changed files with 1286 additions and 508 deletions

View File

@@ -65,6 +65,25 @@ If not what makes it happen or not happen?
Change Log
----------
v3.19.6
#363 Fixed a stratum bug where the first job may be ignored delaying start of hashing
Fixed handling of nonce exhaust when hashing a fast algo with extranonce disabled
Small optimization to Shavite.
v3.19.5
Enhanced stratum-keepalive preemptively resets the stratum connection
before the server to avoid lost shares.
Added build-msys2.sh scrypt for easier compiling on Windows, see Wiki for details.
X16RT: eliminate unnecessary recalculations of the hash order.
Fix a few compiler warnings.
Fixed log colour error when a block is solved.
v3.19.4
#359: Fix verthash memory allocation for non-hugepages, broken in v3.19.3.

View File

@@ -344,7 +344,7 @@ static size_t
detect_cpu(void) {
//union { uint8_t s[12]; uint32_t i[3]; } vendor_string;
//cpu_vendors_x86 vendor = cpu_nobody;
x86_regs regs;
x86_regs regs; regs.eax = regs.ebx = regs.ecx = 0;
uint32_t max_level, max_ext_level;
size_t cpu_flags = 0;
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
@@ -460,4 +460,4 @@ get_top_cpuflag_desc(size_t flag) {
#endif
#endif
#endif /* defined(CPU_X86) || defined(CPU_X86_64) */
#endif /* defined(CPU_X86) || defined(CPU_X86_64) */

View File

@@ -4,11 +4,12 @@ typedef void (FASTCALL *scrypt_ROMixfn)(scrypt_mix_word_t *X/*[chunkWords]*/, sc
#endif
/* romix pre/post nop function */
/*
static void asm_calling_convention
scrypt_romix_nop(scrypt_mix_word_t *blocks, size_t nblocks) {
(void)blocks; (void)nblocks;
}
*/
/* romix pre/post endian conversion function */
static void asm_calling_convention
scrypt_romix_convert_endian(scrypt_mix_word_t *blocks, size_t nblocks) {

View File

@@ -70,7 +70,10 @@ void decred_be_build_stratum_request( char *req, struct work *work,
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
free(xnonce2str);
}
#if !defined(min)
#define min(a,b) (a>b ? (b) :(a))
#endif
void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{

View File

@@ -45,6 +45,6 @@ void sha512Compute32b_parallel(
uint64_t *data[SHA512_PARALLEL_N],
uint64_t *digest[SHA512_PARALLEL_N]);
void sha512ProcessBlock(Sha512Context *context);
void sha512ProcessBlock(Sha512Context contexti[2] );
#endif

View File

@@ -69,7 +69,6 @@ void allium_16way_hash( void *state, const void *input )
intrlv_8x64( vhashB, hash8, hash9, hash10, hash11, hash12, hash13, hash14,
hash15, 256 );
// rintrlv_8x32_8x64( vhashA, vhash, 256 );
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
keccak256_8way_close( &ctx.keccak, vhashA);
keccak256_8way_init( &ctx.keccak );
@@ -284,7 +283,7 @@ void allium_8way_hash( void *hash, const void *input )
blake256_8way_close( &ctx.blake, vhashA );
dintrlv_8x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhashA, 256 );
vhashA, 256 );
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 256 );
intrlv_4x64( vhashB, hash4, hash5, hash6, hash7, 256 );

View File

@@ -49,7 +49,7 @@ void lyra2z_16way_hash( void *state, const void *input )
dintrlv_16x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
hash8, hash9, hash10, hash11 ,hash12, hash13, hash14, hash15,
vhash, 256 );
vhash, 256 );
intrlv_2x256( vhash, hash0, hash1, 256 );
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );

View File

@@ -35,6 +35,7 @@
#include "sph_ripemd.h"
#if 0
/*
* Round functions for RIPEMD (original).
*/
@@ -46,6 +47,7 @@ static const sph_u32 oIV[5] = {
SPH_C32(0x67452301), SPH_C32(0xEFCDAB89),
SPH_C32(0x98BADCFE), SPH_C32(0x10325476)
};
#endif
/*
* Round functions for RIPEMD-128 and RIPEMD-160.
@@ -63,6 +65,8 @@ static const sph_u32 IV[5] = {
#define ROTL SPH_ROTL32
#if 0
/* ===================================================================== */
/*
* RIPEMD (original hash, deprecated).
@@ -539,6 +543,8 @@ sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4])
#undef RIPEMD128_IN
}
#endif
/* ===================================================================== */
/*
* RIPEMD-160.

View File

@@ -84,6 +84,7 @@
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
#if 0
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[64]; /* first field, for alignment */
@@ -204,6 +205,8 @@ void sph_ripemd128_close(void *cc, void *dst);
*/
void sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]);
#endif
/* ===================================================================== */
/**

View File

@@ -18,10 +18,13 @@ static const uint32_t IV512[] =
0xE275EADE, 0x502D9FCD, 0xB9357178, 0x022A4B9A
};
/*
#define mm256_ror2x256hi_1x32( a, b ) \
_mm256_blend_epi32( mm256_shuflr128_32( a ), \
mm256_shuflr128_32( b ), 0x88 )
*/
//#define mm256_ror2x256hi_1x32( a, b ) _mm256_alignr_epi8( b, a, 4 )
#if defined(__VAES__)
@@ -127,24 +130,24 @@ c512_2way( shavite512_2way_context *ctx, const void *msg )
// round 2, 6, 10
k00 = _mm256_xor_si256( k00, mm256_ror2x256hi_1x32( k12, k13 ) );
k00 = _mm256_xor_si256( k00, _mm256_alignr_epi8( k13, k12, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( p3, k00 ), zero );
k01 = _mm256_xor_si256( k01, mm256_ror2x256hi_1x32( k13, k00 ) );
k01 = _mm256_xor_si256( k01, _mm256_alignr_epi8( k00, k13, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k01 ), zero );
k02 = _mm256_xor_si256( k02, mm256_ror2x256hi_1x32( k00, k01 ) );
k02 = _mm256_xor_si256( k02, _mm256_alignr_epi8( k01, k00, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k02 ), zero );
k03 = _mm256_xor_si256( k03, mm256_ror2x256hi_1x32( k01, k02 ) );
k03 = _mm256_xor_si256( k03, _mm256_alignr_epi8( k02, k01, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k03 ), zero );
p2 = _mm256_xor_si256( p2, x );
k10 = _mm256_xor_si256( k10, mm256_ror2x256hi_1x32( k02, k03 ) );
k10 = _mm256_xor_si256( k10, _mm256_alignr_epi8( k03, k02, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( p1, k10 ), zero );
k11 = _mm256_xor_si256( k11, mm256_ror2x256hi_1x32( k03, k10 ) );
k11 = _mm256_xor_si256( k11, _mm256_alignr_epi8( k10, k03, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k11 ), zero );
k12 = _mm256_xor_si256( k12, mm256_ror2x256hi_1x32( k10, k11 ) );
k12 = _mm256_xor_si256( k12, _mm256_alignr_epi8( k11, k10, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k12 ), zero );
k13 = _mm256_xor_si256( k13, mm256_ror2x256hi_1x32( k11, k12 ) );
k13 = _mm256_xor_si256( k13, _mm256_alignr_epi8( k12, k11, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k13 ), zero );
p0 = _mm256_xor_si256( p0, x );
@@ -183,24 +186,24 @@ c512_2way( shavite512_2way_context *ctx, const void *msg )
// round 4, 8, 12
k00 = _mm256_xor_si256( k00, mm256_ror2x256hi_1x32( k12, k13 ) );
k00 = _mm256_xor_si256( k00, _mm256_alignr_epi8( k13, k12, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( p1, k00 ), zero );
k01 = _mm256_xor_si256( k01, mm256_ror2x256hi_1x32( k13, k00 ) );
k01 = _mm256_xor_si256( k01, _mm256_alignr_epi8( k00, k13, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k01 ), zero );
k02 = _mm256_xor_si256( k02, mm256_ror2x256hi_1x32( k00, k01 ) );
k02 = _mm256_xor_si256( k02, _mm256_alignr_epi8( k01, k00, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k02 ), zero );
k03 = _mm256_xor_si256( k03, mm256_ror2x256hi_1x32( k01, k02 ) );
k03 = _mm256_xor_si256( k03, _mm256_alignr_epi8( k02, k01, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k03 ), zero );
p0 = _mm256_xor_si256( p0, x );
k10 = _mm256_xor_si256( k10, mm256_ror2x256hi_1x32( k02, k03 ) );
k10 = _mm256_xor_si256( k10, _mm256_alignr_epi8( k03, k02, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( p3, k10 ), zero );
k11 = _mm256_xor_si256( k11, mm256_ror2x256hi_1x32( k03, k10 ) );
k11 = _mm256_xor_si256( k11, _mm256_alignr_epi8( k10, k03, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k11 ), zero );
k12 = _mm256_xor_si256( k12, mm256_ror2x256hi_1x32( k10, k11 ) );
k12 = _mm256_xor_si256( k12, _mm256_alignr_epi8( k11, k10, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k12 ), zero );
k13 = _mm256_xor_si256( k13, mm256_ror2x256hi_1x32( k11, k12 ) );
k13 = _mm256_xor_si256( k13, _mm256_alignr_epi8( k12, k11, 4 ) );
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k13 ), zero );
p2 = _mm256_xor_si256( p2, x );

View File

@@ -11,10 +11,6 @@ static const uint32_t IV512[] =
0xE275EADE, 0x502D9FCD, 0xB9357178, 0x022A4B9A
};
#define mm512_ror2x512hi_1x32( a, b ) \
_mm512_mask_blend_epi32( 0x8888, mm512_shuflr128_32( a ), \
mm512_shuflr128_32( b ) )
static void
c512_4way( shavite512_4way_context *ctx, const void *msg )
{
@@ -106,24 +102,24 @@ c512_4way( shavite512_4way_context *ctx, const void *msg )
// round 2, 6, 10
K0 = _mm512_xor_si512( K0, mm512_ror2x512hi_1x32( K6, K7 ) );
K0 = _mm512_xor_si512( K0, _mm512_alignr_epi8( K7, K6, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K0 ), m512_zero );
K1 = _mm512_xor_si512( K1, mm512_ror2x512hi_1x32( K7, K0 ) );
K1 = _mm512_xor_si512( K1, _mm512_alignr_epi8( K0, K7, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
K2 = _mm512_xor_si512( K2, mm512_ror2x512hi_1x32( K0, K1 ) );
K2 = _mm512_xor_si512( K2, _mm512_alignr_epi8( K1, K0, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
K3 = _mm512_xor_si512( K3, mm512_ror2x512hi_1x32( K1, K2 ) );
K3 = _mm512_xor_si512( K3, _mm512_alignr_epi8( K2, K1, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
P2 = _mm512_xor_si512( P2, X );
K4 = _mm512_xor_si512( K4, mm512_ror2x512hi_1x32( K2, K3 ) );
K4 = _mm512_xor_si512( K4, _mm512_alignr_epi8( K3, K2, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K4 ), m512_zero );
K5 = _mm512_xor_si512( K5, mm512_ror2x512hi_1x32( K3, K4 ) );
K5 = _mm512_xor_si512( K5, _mm512_alignr_epi8( K4, K3, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
K6 = _mm512_xor_si512( K6, mm512_ror2x512hi_1x32( K4, K5 ) );
K6 = _mm512_xor_si512( K6, _mm512_alignr_epi8( K5, K4, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
K7 = _mm512_xor_si512( K7, mm512_ror2x512hi_1x32( K5, K6 ) );
K7 = _mm512_xor_si512( K7, _mm512_alignr_epi8( K6, K5, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
P0 = _mm512_xor_si512( P0, X );
@@ -162,24 +158,24 @@ c512_4way( shavite512_4way_context *ctx, const void *msg )
// round 4, 8, 12
K0 = _mm512_xor_si512( K0, mm512_ror2x512hi_1x32( K6, K7 ) );
K0 = _mm512_xor_si512( K0, _mm512_alignr_epi8( K7, K6, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K0 ), m512_zero );
K1 = _mm512_xor_si512( K1, mm512_ror2x512hi_1x32( K7, K0 ) );
K1 = _mm512_xor_si512( K1, _mm512_alignr_epi8( K0, K7, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
K2 = _mm512_xor_si512( K2, mm512_ror2x512hi_1x32( K0, K1 ) );
K2 = _mm512_xor_si512( K2, _mm512_alignr_epi8( K1, K0, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
K3 = _mm512_xor_si512( K3, mm512_ror2x512hi_1x32( K1, K2 ) );
K3 = _mm512_xor_si512( K3, _mm512_alignr_epi8( K2, K1, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
P0 = _mm512_xor_si512( P0, X );
K4 = _mm512_xor_si512( K4, mm512_ror2x512hi_1x32( K2, K3 ) );
K4 = _mm512_xor_si512( K4, _mm512_alignr_epi8( K3, K2, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K4 ), m512_zero );
K5 = _mm512_xor_si512( K5, mm512_ror2x512hi_1x32( K3, K4 ) );
K5 = _mm512_xor_si512( K5, _mm512_alignr_epi8( K4, K3, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
K6 = _mm512_xor_si512( K6, mm512_ror2x512hi_1x32( K4, K5 ) );
K6 = _mm512_xor_si512( K6, _mm512_alignr_epi8( K5, K4, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
K7 = _mm512_xor_si512( K7, mm512_ror2x512hi_1x32( K5, K6 ) );
K7 = _mm512_xor_si512( K7, _mm512_alignr_epi8( K6, K5, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
P2 = _mm512_xor_si512( P2, X );

View File

@@ -59,30 +59,6 @@ static const sph_u32 IV512[] = {
C32(0xE275EADE), C32(0x502D9FCD), C32(0xB9357178), C32(0x022A4B9A)
};
// Partially rotate elements in two 128 bit vectors a & b as one 256 bit vector
// and return the rotated 128 bit vector a.
// a[3:0] = { b[0], a[3], a[2], a[1] }
#if defined(__SSSE3__)
#define mm128_ror256hi_1x32( a, b ) _mm_alignr_epi8( b, a, 4 )
#else // SSE2
#define mm128_ror256hi_1x32( a, b ) \
_mm_or_si128( _mm_srli_si128( a, 4 ), \
_mm_slli_si128( b, 12 ) )
#endif
/*
#if defined(__AVX2__)
// 2 way version of above
// a[7:0] = { b[4], a[7], a[6], a[5], b[0], a[3], a[2], a[1] }
#define mm256_ror2x256hi_1x32( a, b ) \
_mm256_blend_epi32( mm256_ror256_1x32( a ), \
mm256_rol256_3x32( b ), 0x88 )
#endif
*/
static void
c512( sph_shavite_big_context *sc, const void *msg )
@@ -190,31 +166,31 @@ c512( sph_shavite_big_context *sc, const void *msg )
// round 2, 6, 10
k00 = _mm_xor_si128( k00, mm128_ror256hi_1x32( k12, k13 ) );
k00 = _mm_xor_si128( k00, _mm_alignr_epi8( k13, k12, 4 ) );
x = _mm_xor_si128( p3, k00 );
x = _mm_aesenc_si128( x, zero );
k01 = _mm_xor_si128( k01, mm128_ror256hi_1x32( k13, k00 ) );
k01 = _mm_xor_si128( k01, _mm_alignr_epi8( k00, k13, 4 ) );
x = _mm_xor_si128( x, k01 );
x = _mm_aesenc_si128( x, zero );
k02 = _mm_xor_si128( k02, mm128_ror256hi_1x32( k00, k01 ) );
k02 = _mm_xor_si128( k02, _mm_alignr_epi8( k01, k00, 4 ) );
x = _mm_xor_si128( x, k02 );
x = _mm_aesenc_si128( x, zero );
k03 = _mm_xor_si128( k03, mm128_ror256hi_1x32( k01, k02 ) );
k03 = _mm_xor_si128( k03, _mm_alignr_epi8( k02, k01, 4 ) );
x = _mm_xor_si128( x, k03 );
x = _mm_aesenc_si128( x, zero );
p2 = _mm_xor_si128( p2, x );
k10 = _mm_xor_si128( k10, mm128_ror256hi_1x32( k02, k03 ) );
k10 = _mm_xor_si128( k10, _mm_alignr_epi8( k03, k02, 4 ) );
x = _mm_xor_si128( p1, k10 );
x = _mm_aesenc_si128( x, zero );
k11 = _mm_xor_si128( k11, mm128_ror256hi_1x32( k03, k10 ) );
k11 = _mm_xor_si128( k11, _mm_alignr_epi8( k10, k03, 4 ) );
x = _mm_xor_si128( x, k11 );
x = _mm_aesenc_si128( x, zero );
k12 = _mm_xor_si128( k12, mm128_ror256hi_1x32( k10, k11 ) );
k12 = _mm_xor_si128( k12, _mm_alignr_epi8( k11, k10, 4 ) );
x = _mm_xor_si128( x, k12 );
x = _mm_aesenc_si128( x, zero );
k13 = _mm_xor_si128( k13, mm128_ror256hi_1x32( k11, k12 ) );
k13 = _mm_xor_si128( k13, _mm_alignr_epi8( k12, k11, 4 ) );
x = _mm_xor_si128( x, k13 );
x = _mm_aesenc_si128( x, zero );
@@ -262,31 +238,31 @@ c512( sph_shavite_big_context *sc, const void *msg )
// round 4, 8, 12
k00 = _mm_xor_si128( k00, mm128_ror256hi_1x32( k12, k13 ) );
k00 = _mm_xor_si128( k00, _mm_alignr_epi8( k13, k12, 4 ) );
x = _mm_xor_si128( p1, k00 );
x = _mm_aesenc_si128( x, zero );
k01 = _mm_xor_si128( k01, mm128_ror256hi_1x32( k13, k00 ) );
k01 = _mm_xor_si128( k01, _mm_alignr_epi8( k00, k13, 4 ) );
x = _mm_xor_si128( x, k01 );
x = _mm_aesenc_si128( x, zero );
k02 = _mm_xor_si128( k02, mm128_ror256hi_1x32( k00, k01 ) );
k02 = _mm_xor_si128( k02, _mm_alignr_epi8( k01, k00, 4 ) );
x = _mm_xor_si128( x, k02 );
x = _mm_aesenc_si128( x, zero );
k03 = _mm_xor_si128( k03, mm128_ror256hi_1x32( k01, k02 ) );
k03 = _mm_xor_si128( k03, _mm_alignr_epi8( k02, k01, 4 ) );
x = _mm_xor_si128( x, k03 );
x = _mm_aesenc_si128( x, zero );
p0 = _mm_xor_si128( p0, x );
k10 = _mm_xor_si128( k10, mm128_ror256hi_1x32( k02, k03 ) );
k10 = _mm_xor_si128( k10, _mm_alignr_epi8( k03, k02, 4 ) );
x = _mm_xor_si128( p3, k10 );
x = _mm_aesenc_si128( x, zero );
k11 = _mm_xor_si128( k11, mm128_ror256hi_1x32( k03, k10 ) );
k11 = _mm_xor_si128( k11, _mm_alignr_epi8( k10, k03, 4 ) );
x = _mm_xor_si128( x, k11 );
x = _mm_aesenc_si128( x, zero );
k12 = _mm_xor_si128( k12, mm128_ror256hi_1x32( k10, k11 ) );
k12 = _mm_xor_si128( k12, _mm_alignr_epi8( k11, k10, 4 ) );
x = _mm_xor_si128( x, k12 );
x = _mm_aesenc_si128( x, zero );
k13 = _mm_xor_si128( k13, mm128_ror256hi_1x32( k11, k12 ) );
k13 = _mm_xor_si128( k13, _mm_alignr_epi8( k12, k11, 4 ) );
x = _mm_xor_si128( x, k13 );
x = _mm_aesenc_si128( x, zero );

View File

@@ -35,7 +35,7 @@
#include "sph_shavite.h"
#if !defined(__AES__)
#if !(defined(__AES__) && defined(__SSSE3__))
#ifdef __cplusplus
extern "C"{

View File

@@ -263,7 +263,7 @@ void sph_shavite384_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
//Don't call these directly from application code, use the macros below.
#ifdef __AES__
#if defined(__AES__) && defined(__SSSE3__)
void sph_shavite512_aesni_init(void *cc);
void sph_shavite512_aesni(void *cc, const void *data, size_t len);

View File

@@ -74,7 +74,7 @@ typedef struct {
void sm3_init(sm3_ctx_t *ctx);
void sm3_update(sm3_ctx_t *ctx, const unsigned char* data, size_t data_len);
void sm3_final(sm3_ctx_t *ctx, unsigned char digest[SM3_DIGEST_LENGTH]);
void sm3_final(sm3_ctx_t *ctx, unsigned char *digest);
void sm3_compress(uint32_t digest[8], const unsigned char block[SM3_BLOCK_SIZE]);
void sm3(const unsigned char *data, size_t datalen,
unsigned char digest[SM3_DIGEST_LENGTH]);

View File

@@ -24,15 +24,15 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff;
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
uint32_t masked_ntime = bswap_32( pdata[17] ) & 0xffffff80;
if ( s_ntime != masked_ntime )
{
x16rt_getTimeHash( ntime, &timeHash );
x16rt_getTimeHash( masked_ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
s_ntime = ntime;
s_ntime = masked_ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
x16r_hash_order, ntime, timeHash );
x16r_hash_order, bswap_32( pdata[17] ), timeHash );
}
x16r_8way_prehash( vdata, pdata );
@@ -78,15 +78,15 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff;
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
uint32_t masked_ntime = bswap_32( pdata[17] ) & 0xffffff80;
if ( s_ntime != masked_ntime )
{
x16rt_getTimeHash( ntime, &timeHash );
x16rt_getTimeHash( masked_ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
s_ntime = ntime;
s_ntime = masked_ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
x16r_hash_order, ntime, timeHash );
x16r_hash_order, bswap_32( pdata[17] ), timeHash );
}
x16r_4way_prehash( vdata, pdata );

View File

@@ -20,15 +20,15 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
mm128_bswap32_80( edata, pdata );
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = swab32( pdata[17] );
if ( s_ntime != ntime )
uint32_t masked_ntime = swab32( pdata[17] ) & 0xffffff80;
if ( s_ntime != masked_ntime )
{
x16rt_getTimeHash( ntime, &timeHash );
x16rt_getTimeHash( masked_ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
s_ntime = ntime;
s_ntime = masked_ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
x16r_hash_order, ntime, timeHash );
x16r_hash_order, swab32( pdata[17] ), timeHash );
}
x16r_prehash( edata, pdata );

View File

@@ -36,8 +36,8 @@ mv cpuminer cpuminer-avx2-sha-vaes
# AVX2 SHA AES: AMD Zen1
make clean || echo done
rm -f config.status
CFLAGS="-O3 -march=znver1 -maes -Wall -fno-common" ./configure --with-curl
#CFLAGS="-O3 -maes -mavx2 -msha -Wall -fno-common" ./configure --with-curl
#CFLAGS="-O3 -march=znver1 -maes -Wall -fno-common" ./configure --with-curl
CFLAGS="-O3 -maes -mavx2 -msha -Wall -fno-common" ./configure --with-curl
make -j 8
strip -s cpuminer
mv cpuminer cpuminer-avx2-sha

10
build-msys2.sh Executable file
View File

@@ -0,0 +1,10 @@
#!/bin/bash
#
# Compile on Windows using MSYS2 and MinGW.
make distclean || echo clean
rm -f config.status
./autogen.sh || echo done
CFLAGS="-O3 --param=evrp-mode=legacy -march=native -Wall -D_WIN32_WINNT=0x0601" ./configure --with-curl
make -j 4
strip -s cpuminer

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.19.4.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.19.6.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.19.4'
PACKAGE_STRING='cpuminer-opt 3.19.4'
PACKAGE_VERSION='3.19.6'
PACKAGE_STRING='cpuminer-opt 3.19.6'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.19.4 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.19.6 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.19.4:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.19.6:";;
esac
cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.19.4
cpuminer-opt configure 3.19.6
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.19.4, which was
It was created by cpuminer-opt $as_me 3.19.6, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.19.4'
VERSION='3.19.6'
cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.19.4, which was
This file was extended by cpuminer-opt $as_me 3.19.6, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.19.4
cpuminer-opt config.status 3.19.6
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.19.4])
AC_INIT([cpuminer-opt], [3.19.6])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -131,6 +131,8 @@ static bool opt_stratum_keepalive = false;
static struct timeval stratum_keepalive_timer;
// Stratum typically times out in 5 minutes or 300 seconds
#define stratum_keepalive_timeout 180 // 3 minutes
static struct timeval stratum_reset_time;
// pk_buffer_size is used as a version selector by b58 code, therefore
// it must be set correctly to work.
@@ -191,7 +193,6 @@ int default_api_listen = 4048;
static struct timeval session_start;
static struct timeval five_min_start;
static uint64_t session_first_block = 0;
static double latency_sum = 0.;
static uint64_t submit_sum = 0;
static uint64_t accept_sum = 0;
static uint64_t stale_sum = 0;
@@ -1147,7 +1148,7 @@ void report_summary_log( bool force )
solved, solved_block_count );
}
if ( stratum_errors )
applog2( LOG_INFO, "Stratum errors %7d", stratum_errors );
applog2( LOG_INFO, "Stratum resets %7d", stratum_errors );
applog2( LOG_INFO, "Hi/Lo Share Diff %.5g / %.5g",
highest_share, lowest_share );
@@ -1278,7 +1279,6 @@ static int share_result( int result, struct work *work,
else reject_sum++;
}
submit_sum++;
latency_sum += latency;
pthread_mutex_unlock( &stats_lock );
@@ -1294,9 +1294,9 @@ static int share_result( int result, struct work *work,
else rcol = CL_LRD;
}
applog( LOG_INFO, "%d %s%s %s%s %s%s %s%s" CL_WHT ", %.3f sec (%dms)",
applog( LOG_INFO, "%d %s%s %s%s %s%s %s%s%s, %.3f sec (%dms)",
my_stats.share_count, acol, ares, scol, sres, rcol, rres, bcol,
bres, share_time, latency );
bres, CL_N, share_time, latency );
if ( unlikely( opt_debug || !result || solved ) )
{
@@ -2114,7 +2114,7 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
{
unsigned char *xnonce2str = bebin2hex( g_work->xnonce2,
g_work->xnonce2_len );
applog( LOG_INFO, "Extranonce2 %s, Block %d, Job %s",
applog( LOG_INFO, "Extranonce2 0x%s, Block %d, Job %s",
xnonce2str, sctx->block_height, g_work->job_id );
free( xnonce2str );
}
@@ -2246,7 +2246,7 @@ static void *miner_thread( void *userdata )
if ( !algo_gate.miner_thread_init( thr_id ) )
{
applog( LOG_ERR, "FAIL: thread %u failed to initialize", thr_id );
applog( LOG_ERR, "FAIL: thread %d failed to initialize", thr_id );
exit (1);
}
@@ -2274,10 +2274,24 @@ static void *miner_thread( void *userdata )
{
while ( unlikely( stratum_down ) )
sleep( 1 );
if ( *nonceptr >= end_nonce )
stratum_gen_work( &stratum, &g_work );
if ( unlikely( ( *nonceptr >= end_nonce )
&& !work_restart[thr_id].restart ) )
{
if ( opt_extranonce )
stratum_gen_work( &stratum, &g_work );
else
{
if ( !thr_id )
{
applog( LOG_WARNING, "nonce range exhausted, extranonce not subscribed" );
applog( LOG_WARNING, "waiting for new work...");
}
while ( !work_restart[thr_id].restart )
sleep ( 1 );
}
}
}
else
else // GBT or getwork
{
pthread_rwlock_wrlock( &g_work_lock );
@@ -2288,8 +2302,7 @@ static void *miner_thread( void *userdata )
if ( unlikely( !get_work( mythr, &g_work ) ) )
{
pthread_rwlock_unlock( &g_work_lock );
applog( LOG_ERR, "work retrieval failed, exiting "
"mining thread %d", thr_id );
applog( LOG_ERR, "work retrieval failed, exiting miner thread %d", thr_id );
goto out;
}
g_work_time = time(NULL);
@@ -2733,6 +2746,18 @@ void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
sctx->job.final_sapling_hash );
}
// Loop is out of order:
//
// connect/reconnect
// handle message
// get new message
//
// change to
// connect/reconnect
// get new message
// handle message
static void *stratum_thread(void *userdata )
{
struct thr_info *mythr = (struct thr_info *) userdata;
@@ -2750,6 +2775,7 @@ static void *stratum_thread(void *userdata )
if ( unlikely( stratum_need_reset ) )
{
stratum_need_reset = false;
gettimeofday( &stratum_reset_time, NULL );
stratum_down = true;
stratum_errors++;
stratum_disconnect( &stratum );
@@ -2760,7 +2786,7 @@ static void *stratum_thread(void *userdata )
applog(LOG_BLUE, "Connection changed to %s", short_url);
}
else
applog(LOG_WARNING, "Stratum connection reset");
applog(LOG_BLUE, "Stratum connection reset");
// reset stats queue as well
restart_threads();
if ( s_get_ptr != s_put_ptr ) s_get_ptr = s_put_ptr = 0;
@@ -2792,35 +2818,8 @@ static void *stratum_thread(void *userdata )
{
stratum_down = false;
applog(LOG_BLUE,"Stratum connection established" );
}
}
report_summary_log( ( stratum_diff != stratum.job.diff )
&& ( stratum_diff != 0. ) );
if ( stratum.new_job )
stratum_gen_work( &stratum, &g_work );
// is keepalive needed?
if ( opt_stratum_keepalive )
{
struct timeval now, et;
gettimeofday( &now, NULL );
// any shares submitted since last keepalive?
if ( last_submit_time.tv_sec > stratum_keepalive_timer.tv_sec )
memcpy( &stratum_keepalive_timer, &last_submit_time,
sizeof (struct timeval) );
timeval_subtract( &et, &now, &stratum_keepalive_timer );
if ( et.tv_sec > stratum_keepalive_timeout )
{
double diff = stratum.job.diff * 0.5;
stratum_keepalive_timer = now;
if ( !opt_quiet )
applog( LOG_BLUE,
"Stratum keepalive requesting lower difficulty" );
stratum_suggest_difficulty( &stratum, diff );
if ( stratum.new_job ) // prime first job
stratum_gen_work( &stratum, &g_work );
}
}
@@ -2846,6 +2845,54 @@ static void *stratum_thread(void *userdata )
stratum_need_reset = true;
// stratum_disconnect( &stratum );
}
report_summary_log( ( stratum_diff != stratum.job.diff )
&& ( stratum_diff != 0. ) );
if ( !stratum_need_reset )
{
// Is keepalive needed? Mutex would normally be required but that
// would block any attempt to submit a share. A share is more
// important even if it messes up the keepalive.
if ( opt_stratum_keepalive )
{
struct timeval now, et;
gettimeofday( &now, NULL );
// any shares submitted since last keepalive?
if ( last_submit_time.tv_sec > stratum_keepalive_timer.tv_sec )
memcpy( &stratum_keepalive_timer, &last_submit_time,
sizeof (struct timeval) );
timeval_subtract( &et, &now, &stratum_keepalive_timer );
if ( et.tv_sec > stratum_keepalive_timeout )
{
double diff = stratum.job.diff * 0.5;
stratum_keepalive_timer = now;
if ( !opt_quiet )
applog( LOG_BLUE,
"Stratum keepalive requesting lower difficulty" );
stratum_suggest_difficulty( &stratum, diff );
}
if ( last_submit_time.tv_sec > stratum_reset_time.tv_sec )
timeval_subtract( &et, &now, &last_submit_time );
else
timeval_subtract( &et, &now, &stratum_reset_time );
if ( et.tv_sec > stratum_keepalive_timeout + 60 )
{
applog( LOG_NOTICE, "No shares submitted, resetting stratum connection" );
stratum_need_reset = true;
stratum_keepalive_timer = now;
}
} // stratum_keepalive
if ( stratum.new_job && !stratum_need_reset )
stratum_gen_work( &stratum, &g_work );
} // stratum_need_reset
} // loop
out:
return NULL;
@@ -3434,7 +3481,8 @@ void parse_arg(int key, char *arg )
break;
case 1021: // cpu-priority
v = atoi(arg);
if (v < 0 || v > 5) /* sanity check */
applog(LOG_NOTICE,"--cpu-priority is deprecated and will be removed from a future release");
if (v < 0 || v > 5) /* sanity check */
show_usage_and_exit(1);
opt_priority = v;
break;
@@ -3470,7 +3518,8 @@ void parse_arg(int key, char *arg )
break;
case 1024:
opt_randomize = true;
break;
applog(LOG_NOTICE,"--randomize is deprecated and will be removed from a future release");
break;
case 1027: // data-file
opt_data_file = strdup( arg );
break;
@@ -3863,6 +3912,8 @@ int main(int argc, char *argv[])
if ( opt_debug )
applog(LOG_INFO,"Creating stratum thread");
stratum.new_job = false; // just to make sure
/* init stratum thread info */
stratum_thr_id = opt_n_threads + 2;
thr = &thr_info[stratum_thr_id];
@@ -3930,6 +3981,7 @@ int main(int argc, char *argv[])
memcpy( &five_min_start, &last_submit_time, sizeof (struct timeval) );
memcpy( &session_start, &last_submit_time, sizeof (struct timeval) );
memcpy( &stratum_keepalive_timer, &last_submit_time, sizeof (struct timeval) );
memcpy( &stratum_reset_time, &last_submit_time, sizeof (struct timeval) );
memcpy( &total_hashes_time, &last_submit_time, sizeof (struct timeval) );
pthread_mutex_unlock( &stats_lock );

19
miner.h
View File

@@ -824,6 +824,7 @@ Options:\n\
qubit Qubit\n\
scrypt scrypt(1024, 1, 1) (default)\n\
scrypt:N scrypt(N, 1, 1)\n\
scryptn2 scrypt(1048576, 1,1)\n\
sha256d Double SHA-256\n\
sha256q Quad SHA-256, Pyrite (PYE)\n\
sha256t Triple SHA-256, Onecoin (OC)\n\
@@ -886,10 +887,10 @@ Options:\n\
-T, --timeout=N timeout for long poll and stratum (default: 300 seconds)\n\
-s, --scantime=N upper bound on time spent scanning current work when\n\
long polling is unavailable, in seconds (default: 5)\n\
--randomize Randomize scan range start to reduce duplicates\n\
-f, --diff-factor=N Divide req. difficulty by this factor (std is 1.0)\n\
--randomize randomize scan range (deprecated)\n\
-f, --diff-factor=N divide req. difficulty by this factor (std is 1.0)\n\
-m, --diff-multiplier=N Multiply difficulty by this factor (std is 1.0)\n\
--hash-meter Display thread hash rates\n\
--hash-meter display thread hash rates\n\
--coinbase-addr=ADDR payout address for solo mining\n\
--coinbase-sig=TEXT data to insert in the coinbase when possible\n\
--no-longpoll disable long polling support\n\
@@ -910,16 +911,16 @@ Options:\n\
-B, --background run the miner in the background\n\
--benchmark run in offline benchmark mode\n\
--cpu-affinity set process affinity to cpu core(s), mask 0x3 for cores 0 and 1\n\
--cpu-priority set process priority (default: 0 idle, 2 normal to 5 highest)\n\
--cpu-priority set process priority (default: 0 idle, 2 normal to 5 highest) (deprecated)\n\
-b, --api-bind=address[:port] IP address for the miner API, default port is 4048)\n\
--api-remote Allow remote control\n\
--max-temp=N Only mine if cpu temp is less than specified value (linux)\n\
--max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\
--max-diff=N Only mine if net difficulty is less than specified value\n\
--api-remote allow remote control\n\
--max-temp=N only mine if cpu temp is less than specified value (linux)\n\
--max-rate=N[KMG] only mine if net hashrate is less than specified value\n\
--max-diff=N only mine if net difficulty is less than specified value\n\
-c, --config=FILE load a JSON-format configuration file\n\
--data-file=FILE path and name of data file\n\
--verify enable additional time consuming start up tests\n\
--stratum-keepalive Prevent disconnects when difficulty is too high\n\
--stratum-keepalive prevent disconnects when difficulty is too high\n\
-V, --version display version and CPU information and exit\n\
-h, --help display this help text and exit\n\
";

File diff suppressed because it is too large Load Diff

View File

@@ -272,9 +272,19 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
#endif
// Mask making
// Equivalent of AVX512 _mm_movepi64_mask & _mm_movepi32_mask.
// Returns 2 or 4 bit integer mask from MSB of 64 or 32 bit elements.
#define mm_movmask_64( v ) \
_mm_castpd_si128( _mm_movmask_pd( _mm_castsi128_pd( v ) ) )
#define mm_movmask_32( v ) \
_mm_castps_si128( _mm_movmask_ps( _mm_castsi128_ps( v ) ) )
// Diagonal blend: d = s3[3], s2[2], s1[1], s0[0] ||
// Diagonal blend
// Blend 4 32 bit elements from 4 vectors
@@ -284,7 +294,7 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
mm_blend_epi32( _mm_blend_epi32( s3, s2, 0x4 ), \
_mm_blend_epi32( s1, s0, 0x1 ), 0x3 )
#elif defined(__SSE4_1)
#elif defined(__SSE4_1__)
#define mm128_diagonal_32( v3, v2, v1, v0 ) \
mm_blend_epi16( _mm_blend_epi16( s3, s2, 0x30 ), \
@@ -401,6 +411,16 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
#define mm128_rol_16( v, c ) \
_mm_or_si128( _mm_slli_epi16( v, c ), _mm_srli_epi16( v, 16-(c) ) )
// Limited 2 input shuffle
#define mm128_shuffle2_64( a, b, c ) \
_mm_castpd_si128( _mm_shuffle_pd( _mm_castsi128_pd( a ), \
_mm_castsi128_pd( b ), c ) );
#define mm128_shuffle2_32( a, b, c ) \
_mm_castps_si128( _mm_shuffle_ps( _mm_castsi128_ps( a ), \
_mm_castsi128_ps( b ), c ) );
//
// Rotate vector elements accross all lanes
@@ -532,9 +552,8 @@ static inline void mm128_block_bswap_32( __m128i *d, const __m128i *s )
#if defined(__SSSE3__)
// Function macro with two inputs and one output, inputs are preserved.
// Returns modified first arg.
// Two input functions are not available without SSSE3. Use procedure
// belowe instead.
// macros below instead.
#define mm128_shufl2r_64( v1, v2 ) _mm_alignr_epi8( v2, v1, 8 )
#define mm128_shufl2l_64( v1, v2 ) _mm_alignr_epi8( v1, v2, 8 )
@@ -548,12 +567,11 @@ static inline void mm128_block_bswap_32( __m128i *d, const __m128i *s )
#define mm128_shufl2r_8( v1, v2 ) _mm_alignr_epi8( v2, v1, 8 )
#define mm128_shufl2l_8( v1, v2 ) _mm_alignr_epi8( v1, v2, 8 )
// Procedure macroswith 2 inputs and 2 outputs, inputs are destroyed.
// Returns both modified args in place.
// Procedure macros with 2 inputs and 2 outputs, inputs args are overwritten.
// These macros retain the vrol/vror name for now to avoid
// confusion with the shufl2r/shuffle2l function macros above.
// These may be renamed to something like shufl2r2 for 2 1nputs and
// These may be renamed to something like shufl2r2 for 2 nputs and
// 2 outputs, ie SHUFfLe 2 inputs Right with 2 outputs.
#define mm128_vror256_64( v1, v2 ) \

View File

@@ -233,6 +233,18 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )
#endif
// Mask making
// Equivalent of AVX512 _mm256_movepi64_mask & _mm256_movepi32_mask.
// Returns 4 or 8 bit integer mask from MSB of 64 or 32 bit elements.
#define mm256_movmask_64( v ) \
_mm256_castpd_si256( _mm256_movmask_pd( _mm256_castsi256_pd( v ) ) )
#define mm256_movmask_32( v ) \
_mm256_castps_si256( _mm256_movmask_ps( _mm256_castsi256_ps( v ) ) )
// Diagonal blending
// Blend 4 64 bit elements from 4 vectors
@@ -405,6 +417,16 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )
//
// Rotate elements within each 128 bit lane of 256 bit vector.
// Limited 2 input shuffle
#define mm256_shuffle2_64( a, b, c ) \
_mm256_castpd_si256( _mm256_shuffle_pd( _mm256_castsi256_pd( a ), \
_mm256_castsi256_pd( b ), c ) );
#define mm256_shuffle2_32( a, b, c ) \
_mm256_castps_si256( _mm256_shuffle_ps( _mm256_castsi256_ps( a ), \
_mm256_castsi256_ps( b ), c ) );
#define mm256_swap128_64( v ) _mm256_shuffle_epi32( v, 0x4e )
#define mm256_shuflr128_64 mm256_swap128_64
#define mm256_shufll128_64 mm256_swap128_64
@@ -485,20 +507,6 @@ static inline __m256i mm256_shuflr128_x8( const __m256i v, const int c )
v2 = _mm256_xor_si256( v1, v2 ); \
v1 = _mm256_xor_si256( v1, v2 );
#define mm256_vror512_128( v1, v2 ) \
do { \
__m256i t = _mm256_permute2x128( v1, v2, 0x03 ); \
v1 = _mm256_permute2x128( v2, v1, 0x21 ); \
v2 = t; \
} while(0)
#define mm256_vrol512_128( v1, v2 ) \
do { \
__m256i t = _mm256_permute2x128( v1, v2, 0x03 ); \
v2 = _mm256_permute2x128( v2, v1, 0x21 ); \
v1 = t; \
} while(0)
#endif // __AVX2__
#endif // SIMD_256_H__

View File

@@ -493,7 +493,7 @@ static inline __m512i mm512_shufll_32( const __m512i v )
static inline __m512i mm512_shuflr_x64( const __m512i v, const int n )
{ return _mm512_alignr_epi64( v, v, n ); }
static inline __m512i mm512_shufll_x32( const __m512i v, const int n )
static inline __m512i mm512_shuflr_x32( const __m512i v, const int n )
{ return _mm512_alignr_epi32( v, v, n ); }
#define mm512_shuflr_16( v ) \
@@ -581,8 +581,17 @@ static inline __m512i mm512_shufll_x32( const __m512i v, const int n )
0x0e0d0c0b0a090807, 0x060504030201001f ) )
//
// Shuffle-roate elements within 128 bit lanes of 512 bit vector.
// Shuffle/rotate elements within 128 bit lanes of 512 bit vector.
// Limited 2 input, 1 output shuffle within 128 bit lanes.
#define mm512_shuffle2_64( a, b, c ) \
_mm512_castpd_si512( _mm512_shuffle_pd( _mm512_castsi512_pd( a ), \
_mm512_castsi512_pd( b ), c ) );
#define mm512_shuffle2_32( a, b, c ) \
_mm512_castps_si512( _mm512_shuffle_ps( _mm512_castsi512_ps( a ), \
_mm512_castsi512_ps( b ), c ) );
// Swap 64 bits in each 128 bit lane
#define mm512_swap128_64( v ) _mm512_shuffle_epi32( v, 0x4e )
#define mm512_shuflr128_64 mm512_swap128_64
@@ -610,6 +619,7 @@ static inline __m512i mm512_shuflr128_8( const __m512i v, const int c )
// shufl2r is 2 input ...
// Drop macros? They can easilly be rebuilt using shufl2 functions
// 2 input, 1 output
// Shuffle concatenated { v1, v2 ) right or left by 256 bits and return
// rotated v1
// visually confusing for shif2r because of arg order. First arg is always
@@ -627,76 +637,5 @@ static inline __m512i mm512_shuflr128_8( const __m512i v, const int c )
#define mm512_shufl2r_32( v1, v2 ) _mm512_alignr_epi32( v2, v1, 1 )
#define mm512_shufl2l_32( v1, v2 ) _mm512_alignr_epi32( v1, v2, 1 )
// Rotate elements from 2 512 bit vectors in place, source arguments
// are overwritten.
#define mm512_swap1024_512( v1, v2 ) \
v1 = _mm512_xor_si512( v1, v2 ); \
v2 = _mm512_xor_si512( v1, v2 ); \
v1 = _mm512_xor_si512( v1, v2 );
#define mm512_shufl2l_512 mm512_swap1024_512 \
#define mm512_shufl2r_512 mm512_swap1024_512 \
// Deprecated, will be removed. Use shufl2 functions instead. Leave them as is
// for now.
// Rotate elements from 2 512 bit vectors in place, both source arguments
// are updated.
#define mm512_vror1024_256( v1, v2 ) \
do { \
__m512i t = _mm512_alignr_epi64( v1, v2, 4 ); \
v1 = _mm512_alignr_epi64( v2, v1, 4 ); \
v2 = t; \
} while(0)
#define mm512_vrol1024_256( v1, v2 ) \
do { \
__m512i t = _mm512_alignr_epi64( v1, v2, 4 ); \
v2 = _mm512_alignr_epi64( v2, v1, 4 ); \
v1 = t; \
} while(0)
#define mm512_vror1024_128( v1, v2 ) \
do { \
__m512i t = _mm512_alignr_epi64( v1, v2, 2 ); \
v1 = _mm512_alignr_epi64( v2, v1, 2 ); \
v2 = t; \
} while(0)
#define mm512_vrol1024_128( v1, v2 ) \
do { \
__m512i t = _mm512_alignr_epi64( v1, v2, 6 ); \
v2 = _mm512_alignr_epi64( v2, v1, 6 ); \
v1 = t; \
} while(0)
#define mm512_vror1024_64( v1, v2 ) \
do { \
__m512i t = _mm512_alignr_epi64( v1, v2, 1 ); \
v1 = _mm512_alignr_epi64( v2, v1, 1 ); \
v2 = t; \
} while(0)
#define mm512_vrol1024_64( v1, v2 ) \
do { \
__m512i t = _mm512_alignr_epi64( v1, v2, 7 ); \
v2 = _mm512_alignr_epi64( v2, v1, 7 ); \
v1 = t; \
} while(0)
#define mm512_vror1024_32( v1, v2 ) \
do { \
__m512i t = _mm512_alignr_epi32( v1, v2, 1 ); \
v1 = _mm512_alignr_epi32( v2, v1, 1 ); \
v2 = t; \
} while(0)
#define mm512_vrol1024_32( v1, v2 ) \
do { \
__m512i t = _mm512_alignr_epi32( v1, v2, 15 ); \
v2 = _mm512_alignr_epi32( v2, v1, 15 ); \
v1 = t; \
} while(0)
#endif // AVX512
#endif // SIMD_512_H__

View File

@@ -209,7 +209,7 @@ static inline void cpu_getname(char *outbuf, size_t maxsz)
{
memset(outbuf, 0, maxsz);
#ifdef WIN32
char brand[0xC0] = { 0 };
char brand[256] = { 0 };
int output[4] = { 0 }, ext;
cpuid(0x80000000, output);
ext = output[0];

2
util.c
View File

@@ -1658,7 +1658,7 @@ static bool stratum_parse_extranonce(struct stratum_ctx *sctx, json_t *params, i
pthread_mutex_unlock(&sctx->work_lock);
if ( !opt_quiet ) /* pool dynamic change */
applog( LOG_INFO, "Stratum extranonce1= %s, extranonce2 size= %d",
applog( LOG_INFO, "Stratum extranonce1 0x%s, extranonce2 size %d",
xnonce1, xn2_size);
return true;

View File

@@ -16,8 +16,8 @@ export MINGW_LIB="/usr/x86_64-w64-mingw32/lib"
export GCC_MINGW_LIB="/usr/lib/gcc/x86_64-w64-mingw32/9.3-win32"
# used by GCC
export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs -L$LOCAL_LIB/openssl"
# support for Windows CPU groups, AES sometimes not included in -march
export DEFAULT_CFLAGS="-O3 -maes -Wall -D_WIN32_WINNT=0x0601"
# Support for Windows 7 CPU groups, AES sometimes not included in -march
export DEFAULT_CFLAGS="-maes -O3 -Wall -D_WIN32_WINNT=0x0601"
export DEFAULT_CFLAGS_OLD="-O3 -Wall"
# make link to local gmp header file.
@@ -26,8 +26,8 @@ ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
# make release directory and copy selected DLLs.
rm -rf release > /dev/null
mkdir release
cp README.txt release/
cp README.md release/
cp RELEASE_NOTES release/