mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
db76d3865f | ||
![]() |
5b678d2481 | ||
![]() |
90137b391e | ||
![]() |
8727d79182 |
@@ -65,6 +65,39 @@ If not what makes it happen or not happen?
|
|||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
v3.19.7
|
||||||
|
|
||||||
|
#369 Fixed time limited mining, --time-limit.
|
||||||
|
Fixed a potential compile error when using optimization below -O3.
|
||||||
|
|
||||||
|
v3.19.6
|
||||||
|
|
||||||
|
#363 Fixed a stratum bug where the first job may be ignored delaying start of hashing
|
||||||
|
Fixed handling of nonce exhaust when hashing a fast algo with extranonce disabled
|
||||||
|
Small optimization to Shavite.
|
||||||
|
|
||||||
|
v3.19.5
|
||||||
|
|
||||||
|
Enhanced stratum-keepalive preemptively resets the stratum connection
|
||||||
|
before the server to avoid lost shares.
|
||||||
|
|
||||||
|
Added build-msys2.sh shell script for easier compiling on Windows, see Wiki for details.
|
||||||
|
|
||||||
|
X16RT: eliminate unnecessary recalculations of the hash order.
|
||||||
|
|
||||||
|
Fix a few compiler warnings.
|
||||||
|
|
||||||
|
Fixed log colour error when a block is solved.
|
||||||
|
|
||||||
|
v3.19.4
|
||||||
|
|
||||||
|
#359: Fix verthash memory allocation for non-hugepages, broken in v3.19.3.
|
||||||
|
|
||||||
|
New option stratum-keepalive prevents stratum timeouts when no shares are
|
||||||
|
submitted for several minutes due to high difficulty.
|
||||||
|
|
||||||
|
Fixed a bug displaying optimizations for some algos.
|
||||||
|
|
||||||
v3.19.3
|
v3.19.3
|
||||||
|
|
||||||
Linux: Faster verthash (+25%), scryptn2 (+2%) when huge pages are available.
|
Linux: Faster verthash (+25%), scryptn2 (+2%) when huge pages are available.
|
||||||
|
@@ -344,7 +344,7 @@ static size_t
|
|||||||
detect_cpu(void) {
|
detect_cpu(void) {
|
||||||
//union { uint8_t s[12]; uint32_t i[3]; } vendor_string;
|
//union { uint8_t s[12]; uint32_t i[3]; } vendor_string;
|
||||||
//cpu_vendors_x86 vendor = cpu_nobody;
|
//cpu_vendors_x86 vendor = cpu_nobody;
|
||||||
x86_regs regs;
|
x86_regs regs; regs.eax = regs.ebx = regs.ecx = 0;
|
||||||
uint32_t max_level, max_ext_level;
|
uint32_t max_level, max_ext_level;
|
||||||
size_t cpu_flags = 0;
|
size_t cpu_flags = 0;
|
||||||
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
|
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
|
||||||
@@ -460,4 +460,4 @@ get_top_cpuflag_desc(size_t flag) {
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* defined(CPU_X86) || defined(CPU_X86_64) */
|
#endif /* defined(CPU_X86) || defined(CPU_X86_64) */
|
||||||
|
@@ -4,11 +4,12 @@ typedef void (FASTCALL *scrypt_ROMixfn)(scrypt_mix_word_t *X/*[chunkWords]*/, sc
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* romix pre/post nop function */
|
/* romix pre/post nop function */
|
||||||
|
/*
|
||||||
static void asm_calling_convention
|
static void asm_calling_convention
|
||||||
scrypt_romix_nop(scrypt_mix_word_t *blocks, size_t nblocks) {
|
scrypt_romix_nop(scrypt_mix_word_t *blocks, size_t nblocks) {
|
||||||
(void)blocks; (void)nblocks;
|
(void)blocks; (void)nblocks;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
/* romix pre/post endian conversion function */
|
/* romix pre/post endian conversion function */
|
||||||
static void asm_calling_convention
|
static void asm_calling_convention
|
||||||
scrypt_romix_convert_endian(scrypt_mix_word_t *blocks, size_t nblocks) {
|
scrypt_romix_convert_endian(scrypt_mix_word_t *blocks, size_t nblocks) {
|
||||||
|
@@ -70,7 +70,10 @@ void decred_be_build_stratum_request( char *req, struct work *work,
|
|||||||
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
||||||
free(xnonce2str);
|
free(xnonce2str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if !defined(min)
|
||||||
#define min(a,b) (a>b ? (b) :(a))
|
#define min(a,b) (a>b ? (b) :(a))
|
||||||
|
#endif
|
||||||
|
|
||||||
void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||||
{
|
{
|
||||||
|
@@ -594,9 +594,6 @@ void bmw512_2way_close( bmw_2way_big_context *ctx, void *dst )
|
|||||||
#define rb6(x) mm256_rol_64( x, 43 )
|
#define rb6(x) mm256_rol_64( x, 43 )
|
||||||
#define rb7(x) mm256_rol_64( x, 53 )
|
#define rb7(x) mm256_rol_64( x, 53 )
|
||||||
|
|
||||||
#define rol_off_64( M, j ) \
|
|
||||||
mm256_rol_64( M[ (j) & 0xF ], ( (j) & 0xF ) + 1 )
|
|
||||||
|
|
||||||
#define add_elt_b( mj0, mj3, mj10, h, K ) \
|
#define add_elt_b( mj0, mj3, mj10, h, K ) \
|
||||||
_mm256_xor_si256( h, _mm256_add_epi64( K, \
|
_mm256_xor_si256( h, _mm256_add_epi64( K, \
|
||||||
_mm256_sub_epi64( _mm256_add_epi64( mj0, mj3 ), mj10 ) ) )
|
_mm256_sub_epi64( _mm256_add_epi64( mj0, mj3 ), mj10 ) ) )
|
||||||
@@ -732,8 +729,23 @@ void compress_big( const __m256i *M, const __m256i H[16], __m256i dH[16] )
|
|||||||
qt[15] = _mm256_add_epi64( sb0( Wb15), H[ 0] );
|
qt[15] = _mm256_add_epi64( sb0( Wb15), H[ 0] );
|
||||||
|
|
||||||
__m256i mj[16];
|
__m256i mj[16];
|
||||||
for ( i = 0; i < 16; i++ )
|
|
||||||
mj[i] = rol_off_64( M, i );
|
mj[ 0] = mm256_rol_64( M[ 0], 1 );
|
||||||
|
mj[ 1] = mm256_rol_64( M[ 1], 2 );
|
||||||
|
mj[ 2] = mm256_rol_64( M[ 2], 3 );
|
||||||
|
mj[ 3] = mm256_rol_64( M[ 3], 4 );
|
||||||
|
mj[ 4] = mm256_rol_64( M[ 4], 5 );
|
||||||
|
mj[ 5] = mm256_rol_64( M[ 5], 6 );
|
||||||
|
mj[ 6] = mm256_rol_64( M[ 6], 7 );
|
||||||
|
mj[ 7] = mm256_rol_64( M[ 7], 8 );
|
||||||
|
mj[ 8] = mm256_rol_64( M[ 8], 9 );
|
||||||
|
mj[ 9] = mm256_rol_64( M[ 9], 10 );
|
||||||
|
mj[10] = mm256_rol_64( M[10], 11 );
|
||||||
|
mj[11] = mm256_rol_64( M[11], 12 );
|
||||||
|
mj[12] = mm256_rol_64( M[12], 13 );
|
||||||
|
mj[13] = mm256_rol_64( M[13], 14 );
|
||||||
|
mj[14] = mm256_rol_64( M[14], 15 );
|
||||||
|
mj[15] = mm256_rol_64( M[15], 16 );
|
||||||
|
|
||||||
qt[16] = add_elt_b( mj[ 0], mj[ 3], mj[10], H[ 7],
|
qt[16] = add_elt_b( mj[ 0], mj[ 3], mj[10], H[ 7],
|
||||||
(const __m256i)_mm256_set1_epi64x( 16 * 0x0555555555555555ULL ) );
|
(const __m256i)_mm256_set1_epi64x( 16 * 0x0555555555555555ULL ) );
|
||||||
@@ -1034,9 +1046,6 @@ bmw512_4way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
|||||||
#define r8b6(x) mm512_rol_64( x, 43 )
|
#define r8b6(x) mm512_rol_64( x, 43 )
|
||||||
#define r8b7(x) mm512_rol_64( x, 53 )
|
#define r8b7(x) mm512_rol_64( x, 53 )
|
||||||
|
|
||||||
#define rol8w_off_64( M, j ) \
|
|
||||||
mm512_rol_64( M[ (j) & 0xF ], ( (j) & 0xF ) + 1 )
|
|
||||||
|
|
||||||
#define add_elt_b8( mj0, mj3, mj10, h, K ) \
|
#define add_elt_b8( mj0, mj3, mj10, h, K ) \
|
||||||
_mm512_xor_si512( h, _mm512_add_epi64( K, \
|
_mm512_xor_si512( h, _mm512_add_epi64( K, \
|
||||||
_mm512_sub_epi64( _mm512_add_epi64( mj0, mj3 ), mj10 ) ) )
|
_mm512_sub_epi64( _mm512_add_epi64( mj0, mj3 ), mj10 ) ) )
|
||||||
@@ -1171,41 +1180,73 @@ void compress_big_8way( const __m512i *M, const __m512i H[16],
|
|||||||
qt[15] = _mm512_add_epi64( s8b0( W8b15), H[ 0] );
|
qt[15] = _mm512_add_epi64( s8b0( W8b15), H[ 0] );
|
||||||
|
|
||||||
__m512i mj[16];
|
__m512i mj[16];
|
||||||
for ( i = 0; i < 16; i++ )
|
uint64_t K = 16 * 0x0555555555555555ULL;
|
||||||
mj[i] = rol8w_off_64( M, i );
|
|
||||||
|
mj[ 0] = mm512_rol_64( M[ 0], 1 );
|
||||||
|
mj[ 1] = mm512_rol_64( M[ 1], 2 );
|
||||||
|
mj[ 2] = mm512_rol_64( M[ 2], 3 );
|
||||||
|
mj[ 3] = mm512_rol_64( M[ 3], 4 );
|
||||||
|
mj[ 4] = mm512_rol_64( M[ 4], 5 );
|
||||||
|
mj[ 5] = mm512_rol_64( M[ 5], 6 );
|
||||||
|
mj[ 6] = mm512_rol_64( M[ 6], 7 );
|
||||||
|
mj[ 7] = mm512_rol_64( M[ 7], 8 );
|
||||||
|
mj[ 8] = mm512_rol_64( M[ 8], 9 );
|
||||||
|
mj[ 9] = mm512_rol_64( M[ 9], 10 );
|
||||||
|
mj[10] = mm512_rol_64( M[10], 11 );
|
||||||
|
mj[11] = mm512_rol_64( M[11], 12 );
|
||||||
|
mj[12] = mm512_rol_64( M[12], 13 );
|
||||||
|
mj[13] = mm512_rol_64( M[13], 14 );
|
||||||
|
mj[14] = mm512_rol_64( M[14], 15 );
|
||||||
|
mj[15] = mm512_rol_64( M[15], 16 );
|
||||||
|
|
||||||
qt[16] = add_elt_b8( mj[ 0], mj[ 3], mj[10], H[ 7],
|
qt[16] = add_elt_b8( mj[ 0], mj[ 3], mj[10], H[ 7],
|
||||||
(const __m512i)_mm512_set1_epi64( 16 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
K += 0x0555555555555555ULL;
|
||||||
qt[17] = add_elt_b8( mj[ 1], mj[ 4], mj[11], H[ 8],
|
qt[17] = add_elt_b8( mj[ 1], mj[ 4], mj[11], H[ 8],
|
||||||
(const __m512i)_mm512_set1_epi64( 17 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
K += 0x0555555555555555ULL;
|
||||||
qt[18] = add_elt_b8( mj[ 2], mj[ 5], mj[12], H[ 9],
|
qt[18] = add_elt_b8( mj[ 2], mj[ 5], mj[12], H[ 9],
|
||||||
(const __m512i)_mm512_set1_epi64( 18 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
K += 0x0555555555555555ULL;
|
||||||
qt[19] = add_elt_b8( mj[ 3], mj[ 6], mj[13], H[10],
|
qt[19] = add_elt_b8( mj[ 3], mj[ 6], mj[13], H[10],
|
||||||
(const __m512i)_mm512_set1_epi64( 19 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
K += 0x0555555555555555ULL;
|
||||||
qt[20] = add_elt_b8( mj[ 4], mj[ 7], mj[14], H[11],
|
qt[20] = add_elt_b8( mj[ 4], mj[ 7], mj[14], H[11],
|
||||||
(const __m512i)_mm512_set1_epi64( 20 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
K += 0x0555555555555555ULL;
|
||||||
qt[21] = add_elt_b8( mj[ 5], mj[ 8], mj[15], H[12],
|
qt[21] = add_elt_b8( mj[ 5], mj[ 8], mj[15], H[12],
|
||||||
(const __m512i)_mm512_set1_epi64( 21 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
K += 0x0555555555555555ULL;
|
||||||
qt[22] = add_elt_b8( mj[ 6], mj[ 9], mj[ 0], H[13],
|
qt[22] = add_elt_b8( mj[ 6], mj[ 9], mj[ 0], H[13],
|
||||||
(const __m512i)_mm512_set1_epi64( 22 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
K += 0x0555555555555555ULL;
|
||||||
qt[23] = add_elt_b8( mj[ 7], mj[10], mj[ 1], H[14],
|
qt[23] = add_elt_b8( mj[ 7], mj[10], mj[ 1], H[14],
|
||||||
(const __m512i)_mm512_set1_epi64( 23 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
K += 0x0555555555555555ULL;
|
||||||
qt[24] = add_elt_b8( mj[ 8], mj[11], mj[ 2], H[15],
|
qt[24] = add_elt_b8( mj[ 8], mj[11], mj[ 2], H[15],
|
||||||
(const __m512i)_mm512_set1_epi64( 24 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
K += 0x0555555555555555ULL;
|
||||||
qt[25] = add_elt_b8( mj[ 9], mj[12], mj[ 3], H[ 0],
|
qt[25] = add_elt_b8( mj[ 9], mj[12], mj[ 3], H[ 0],
|
||||||
(const __m512i)_mm512_set1_epi64( 25 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
K += 0x0555555555555555ULL;
|
||||||
qt[26] = add_elt_b8( mj[10], mj[13], mj[ 4], H[ 1],
|
qt[26] = add_elt_b8( mj[10], mj[13], mj[ 4], H[ 1],
|
||||||
(const __m512i)_mm512_set1_epi64( 26 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
K += 0x0555555555555555ULL;
|
||||||
qt[27] = add_elt_b8( mj[11], mj[14], mj[ 5], H[ 2],
|
qt[27] = add_elt_b8( mj[11], mj[14], mj[ 5], H[ 2],
|
||||||
(const __m512i)_mm512_set1_epi64( 27 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
K += 0x0555555555555555ULL;
|
||||||
qt[28] = add_elt_b8( mj[12], mj[15], mj[ 6], H[ 3],
|
qt[28] = add_elt_b8( mj[12], mj[15], mj[ 6], H[ 3],
|
||||||
(const __m512i)_mm512_set1_epi64( 28 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
K += 0x0555555555555555ULL;
|
||||||
qt[29] = add_elt_b8( mj[13], mj[ 0], mj[ 7], H[ 4],
|
qt[29] = add_elt_b8( mj[13], mj[ 0], mj[ 7], H[ 4],
|
||||||
(const __m512i)_mm512_set1_epi64( 29 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
K += 0x0555555555555555ULL;
|
||||||
qt[30] = add_elt_b8( mj[14], mj[ 1], mj[ 8], H[ 5],
|
qt[30] = add_elt_b8( mj[14], mj[ 1], mj[ 8], H[ 5],
|
||||||
(const __m512i)_mm512_set1_epi64( 30 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
K += 0x0555555555555555ULL;
|
||||||
qt[31] = add_elt_b8( mj[15], mj[ 2], mj[ 9], H[ 6],
|
qt[31] = add_elt_b8( mj[15], mj[ 2], mj[ 9], H[ 6],
|
||||||
(const __m512i)_mm512_set1_epi64( 31 * 0x0555555555555555ULL ) );
|
(const __m512i)_mm512_set1_epi64( K ) );
|
||||||
|
|
||||||
|
|
||||||
qt[16] = _mm512_add_epi64( qt[16], expand1_b8( qt, 16 ) );
|
qt[16] = _mm512_add_epi64( qt[16], expand1_b8( qt, 16 ) );
|
||||||
qt[17] = _mm512_add_epi64( qt[17], expand1_b8( qt, 17 ) );
|
qt[17] = _mm512_add_epi64( qt[17], expand1_b8( qt, 17 ) );
|
||||||
|
@@ -45,6 +45,6 @@ void sha512Compute32b_parallel(
|
|||||||
uint64_t *data[SHA512_PARALLEL_N],
|
uint64_t *data[SHA512_PARALLEL_N],
|
||||||
uint64_t *digest[SHA512_PARALLEL_N]);
|
uint64_t *digest[SHA512_PARALLEL_N]);
|
||||||
|
|
||||||
void sha512ProcessBlock(Sha512Context *context);
|
void sha512ProcessBlock(Sha512Context contexti[2] );
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -69,7 +69,6 @@ void allium_16way_hash( void *state, const void *input )
|
|||||||
intrlv_8x64( vhashB, hash8, hash9, hash10, hash11, hash12, hash13, hash14,
|
intrlv_8x64( vhashB, hash8, hash9, hash10, hash11, hash12, hash13, hash14,
|
||||||
hash15, 256 );
|
hash15, 256 );
|
||||||
|
|
||||||
// rintrlv_8x32_8x64( vhashA, vhash, 256 );
|
|
||||||
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
|
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
|
||||||
keccak256_8way_close( &ctx.keccak, vhashA);
|
keccak256_8way_close( &ctx.keccak, vhashA);
|
||||||
keccak256_8way_init( &ctx.keccak );
|
keccak256_8way_init( &ctx.keccak );
|
||||||
@@ -284,7 +283,7 @@ void allium_8way_hash( void *hash, const void *input )
|
|||||||
blake256_8way_close( &ctx.blake, vhashA );
|
blake256_8way_close( &ctx.blake, vhashA );
|
||||||
|
|
||||||
dintrlv_8x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
dintrlv_8x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||||
vhashA, 256 );
|
vhashA, 256 );
|
||||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 256 );
|
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 256 );
|
||||||
intrlv_4x64( vhashB, hash4, hash5, hash6, hash7, 256 );
|
intrlv_4x64( vhashB, hash4, hash5, hash6, hash7, 256 );
|
||||||
|
|
||||||
|
@@ -49,7 +49,7 @@ void lyra2z_16way_hash( void *state, const void *input )
|
|||||||
|
|
||||||
dintrlv_16x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
dintrlv_16x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||||
hash8, hash9, hash10, hash11 ,hash12, hash13, hash14, hash15,
|
hash8, hash9, hash10, hash11 ,hash12, hash13, hash14, hash15,
|
||||||
vhash, 256 );
|
vhash, 256 );
|
||||||
|
|
||||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||||
|
@@ -261,7 +261,7 @@ inline void reducedDuplexRowSetup_2way( uint64_t *State, uint64_t *rowIn,
|
|||||||
// overlap it's unified.
|
// overlap it's unified.
|
||||||
// As a result normal is Nrows-2 / Nrows.
|
// As a result normal is Nrows-2 / Nrows.
|
||||||
// for 4 rows: 1 unified, 2 overlap, 1 normal.
|
// for 4 rows: 1 unified, 2 overlap, 1 normal.
|
||||||
// for 8 rows: 1 unified, 2 overlap, 56 normal.
|
// for 8 rows: 1 unified, 2 overlap, 5 normal.
|
||||||
|
|
||||||
static inline void reducedDuplexRow_2way_normal( uint64_t *State,
|
static inline void reducedDuplexRow_2way_normal( uint64_t *State,
|
||||||
uint64_t *rowIn, uint64_t *rowInOut0, uint64_t *rowInOut1,
|
uint64_t *rowIn, uint64_t *rowInOut0, uint64_t *rowInOut1,
|
||||||
@@ -283,6 +283,15 @@ static inline void reducedDuplexRow_2way_normal( uint64_t *State,
|
|||||||
for ( i = 0; i < nCols; i++ )
|
for ( i = 0; i < nCols; i++ )
|
||||||
{
|
{
|
||||||
//Absorbing "M[prev] [+] M[row*]"
|
//Absorbing "M[prev] [+] M[row*]"
|
||||||
|
io0 = _mm512_load_si512( inout0 );
|
||||||
|
io1 = _mm512_load_si512( inout0 +1 );
|
||||||
|
io2 = _mm512_load_si512( inout0 +2 );
|
||||||
|
|
||||||
|
io0 = _mm512_mask_load_epi64( io0, 0xf0, inout1 );
|
||||||
|
io1 = _mm512_mask_load_epi64( io1, 0xf0, inout1 +1 );
|
||||||
|
io2 = _mm512_mask_load_epi64( io2, 0xf0, inout1 +2 );
|
||||||
|
|
||||||
|
/*
|
||||||
io0 = _mm512_mask_blend_epi64( 0xf0,
|
io0 = _mm512_mask_blend_epi64( 0xf0,
|
||||||
_mm512_load_si512( (__m512i*)inout0 ),
|
_mm512_load_si512( (__m512i*)inout0 ),
|
||||||
_mm512_load_si512( (__m512i*)inout1 ) );
|
_mm512_load_si512( (__m512i*)inout1 ) );
|
||||||
@@ -292,6 +301,7 @@ static inline void reducedDuplexRow_2way_normal( uint64_t *State,
|
|||||||
io2 = _mm512_mask_blend_epi64( 0xf0,
|
io2 = _mm512_mask_blend_epi64( 0xf0,
|
||||||
_mm512_load_si512( (__m512i*)inout0 +2 ),
|
_mm512_load_si512( (__m512i*)inout0 +2 ),
|
||||||
_mm512_load_si512( (__m512i*)inout1 +2 ) );
|
_mm512_load_si512( (__m512i*)inout1 +2 ) );
|
||||||
|
*/
|
||||||
|
|
||||||
state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io0 ) );
|
state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io0 ) );
|
||||||
state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io1 ) );
|
state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io1 ) );
|
||||||
@@ -359,6 +369,15 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
|
|||||||
for ( i = 0; i < nCols; i++ )
|
for ( i = 0; i < nCols; i++ )
|
||||||
{
|
{
|
||||||
//Absorbing "M[prev] [+] M[row*]"
|
//Absorbing "M[prev] [+] M[row*]"
|
||||||
|
io0.v512 = _mm512_load_si512( inout0 );
|
||||||
|
io1.v512 = _mm512_load_si512( inout0 +1 );
|
||||||
|
io2.v512 = _mm512_load_si512( inout0 +2 );
|
||||||
|
|
||||||
|
io0.v512 = _mm512_mask_load_epi64( io0.v512, 0xf0, inout1 );
|
||||||
|
io1.v512 = _mm512_mask_load_epi64( io1.v512, 0xf0, inout1 +1 );
|
||||||
|
io2.v512 = _mm512_mask_load_epi64( io2.v512, 0xf0, inout1 +2 );
|
||||||
|
|
||||||
|
/*
|
||||||
io0.v512 = _mm512_mask_blend_epi64( 0xf0,
|
io0.v512 = _mm512_mask_blend_epi64( 0xf0,
|
||||||
_mm512_load_si512( (__m512i*)inout0 ),
|
_mm512_load_si512( (__m512i*)inout0 ),
|
||||||
_mm512_load_si512( (__m512i*)inout1 ) );
|
_mm512_load_si512( (__m512i*)inout1 ) );
|
||||||
@@ -368,27 +387,12 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
|
|||||||
io2.v512 = _mm512_mask_blend_epi64( 0xf0,
|
io2.v512 = _mm512_mask_blend_epi64( 0xf0,
|
||||||
_mm512_load_si512( (__m512i*)inout0 +2 ),
|
_mm512_load_si512( (__m512i*)inout0 +2 ),
|
||||||
_mm512_load_si512( (__m512i*)inout1 +2 ) );
|
_mm512_load_si512( (__m512i*)inout1 +2 ) );
|
||||||
|
*/
|
||||||
|
|
||||||
state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io0.v512 ) );
|
state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io0.v512 ) );
|
||||||
state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io1.v512 ) );
|
state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io1.v512 ) );
|
||||||
state2 = _mm512_xor_si512( state2, _mm512_add_epi64( in[2], io2.v512 ) );
|
state2 = _mm512_xor_si512( state2, _mm512_add_epi64( in[2], io2.v512 ) );
|
||||||
|
|
||||||
/*
|
|
||||||
io.v512[0] = _mm512_mask_blend_epi64( 0xf0,
|
|
||||||
_mm512_load_si512( (__m512i*)inout0 ),
|
|
||||||
_mm512_load_si512( (__m512i*)inout1 ) );
|
|
||||||
io.v512[1] = _mm512_mask_blend_epi64( 0xf0,
|
|
||||||
_mm512_load_si512( (__m512i*)inout0 +1 ),
|
|
||||||
_mm512_load_si512( (__m512i*)inout1 +1 ) );
|
|
||||||
io.v512[2] = _mm512_mask_blend_epi64( 0xf0,
|
|
||||||
_mm512_load_si512( (__m512i*)inout0 +2 ),
|
|
||||||
_mm512_load_si512( (__m512i*)inout1 +2 ) );
|
|
||||||
|
|
||||||
state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io.v512[0] ) );
|
|
||||||
state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io.v512[1] ) );
|
|
||||||
state2 = _mm512_xor_si512( state2, _mm512_add_epi64( in[2], io.v512[2] ) );
|
|
||||||
*/
|
|
||||||
|
|
||||||
//Applies the reduced-round transformation f to the sponge's state
|
//Applies the reduced-round transformation f to the sponge's state
|
||||||
LYRA_ROUND_2WAY_AVX512( state0, state1, state2, state3 );
|
LYRA_ROUND_2WAY_AVX512( state0, state1, state2, state3 );
|
||||||
|
|
||||||
@@ -415,22 +419,6 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
|
|||||||
io2.v512 = _mm512_mask_blend_epi64( 0xf0, io2.v512, out[2] );
|
io2.v512 = _mm512_mask_blend_epi64( 0xf0, io2.v512, out[2] );
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
if ( rowOut == rowInOut0 )
|
|
||||||
{
|
|
||||||
io.v512[0] = _mm512_mask_blend_epi64( 0x0f, io.v512[0], out[0] );
|
|
||||||
io.v512[1] = _mm512_mask_blend_epi64( 0x0f, io.v512[1], out[1] );
|
|
||||||
io.v512[2] = _mm512_mask_blend_epi64( 0x0f, io.v512[2], out[2] );
|
|
||||||
|
|
||||||
}
|
|
||||||
if ( rowOut == rowInOut1 )
|
|
||||||
{
|
|
||||||
io.v512[0] = _mm512_mask_blend_epi64( 0xf0, io.v512[0], out[0] );
|
|
||||||
io.v512[1] = _mm512_mask_blend_epi64( 0xf0, io.v512[1], out[1] );
|
|
||||||
io.v512[2] = _mm512_mask_blend_epi64( 0xf0, io.v512[2], out[2] );
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
|
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
|
||||||
t0 = _mm512_permutex_epi64( state0, 0x93 );
|
t0 = _mm512_permutex_epi64( state0, 0x93 );
|
||||||
t1 = _mm512_permutex_epi64( state1, 0x93 );
|
t1 = _mm512_permutex_epi64( state1, 0x93 );
|
||||||
@@ -444,12 +432,23 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
|
|||||||
_mm512_mask_blend_epi64( 0x11, t2, t1 ) );
|
_mm512_mask_blend_epi64( 0x11, t2, t1 ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
casti_m256i( inout0, 0 ) = _mm512_castsi512_si256( io0.v512 );
|
||||||
|
casti_m256i( inout0, 2 ) = _mm512_castsi512_si256( io1.v512 );
|
||||||
|
casti_m256i( inout0, 4 ) = _mm512_castsi512_si256( io2.v512 );
|
||||||
|
_mm512_mask_store_epi64( inout1, 0xf0, io0.v512 );
|
||||||
|
_mm512_mask_store_epi64( inout1 +1, 0xf0, io1.v512 );
|
||||||
|
_mm512_mask_store_epi64( inout1 +2, 0xf0, io2.v512 );
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
casti_m256i( inout0, 0 ) = io0.v256lo;
|
casti_m256i( inout0, 0 ) = io0.v256lo;
|
||||||
casti_m256i( inout1, 1 ) = io0.v256hi;
|
casti_m256i( inout1, 1 ) = io0.v256hi;
|
||||||
casti_m256i( inout0, 2 ) = io1.v256lo;
|
casti_m256i( inout0, 2 ) = io1.v256lo;
|
||||||
casti_m256i( inout1, 3 ) = io1.v256hi;
|
casti_m256i( inout1, 3 ) = io1.v256hi;
|
||||||
casti_m256i( inout0, 4 ) = io2.v256lo;
|
casti_m256i( inout0, 4 ) = io2.v256lo;
|
||||||
casti_m256i( inout1, 5 ) = io2.v256hi;
|
casti_m256i( inout1, 5 ) = io2.v256hi;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
_mm512_mask_store_epi64( inout0, 0x0f, io.v512[0] );
|
_mm512_mask_store_epi64( inout0, 0x0f, io.v512[0] );
|
||||||
_mm512_mask_store_epi64( inout1, 0xf0, io.v512[0] );
|
_mm512_mask_store_epi64( inout1, 0xf0, io.v512[0] );
|
||||||
|
@@ -35,6 +35,7 @@
|
|||||||
|
|
||||||
#include "sph_ripemd.h"
|
#include "sph_ripemd.h"
|
||||||
|
|
||||||
|
#if 0
|
||||||
/*
|
/*
|
||||||
* Round functions for RIPEMD (original).
|
* Round functions for RIPEMD (original).
|
||||||
*/
|
*/
|
||||||
@@ -46,6 +47,7 @@ static const sph_u32 oIV[5] = {
|
|||||||
SPH_C32(0x67452301), SPH_C32(0xEFCDAB89),
|
SPH_C32(0x67452301), SPH_C32(0xEFCDAB89),
|
||||||
SPH_C32(0x98BADCFE), SPH_C32(0x10325476)
|
SPH_C32(0x98BADCFE), SPH_C32(0x10325476)
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Round functions for RIPEMD-128 and RIPEMD-160.
|
* Round functions for RIPEMD-128 and RIPEMD-160.
|
||||||
@@ -63,6 +65,8 @@ static const sph_u32 IV[5] = {
|
|||||||
|
|
||||||
#define ROTL SPH_ROTL32
|
#define ROTL SPH_ROTL32
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
|
||||||
/* ===================================================================== */
|
/* ===================================================================== */
|
||||||
/*
|
/*
|
||||||
* RIPEMD (original hash, deprecated).
|
* RIPEMD (original hash, deprecated).
|
||||||
@@ -539,6 +543,8 @@ sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4])
|
|||||||
#undef RIPEMD128_IN
|
#undef RIPEMD128_IN
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
/* ===================================================================== */
|
/* ===================================================================== */
|
||||||
/*
|
/*
|
||||||
* RIPEMD-160.
|
* RIPEMD-160.
|
||||||
|
@@ -84,6 +84,7 @@
|
|||||||
* can be cloned by copying the context (e.g. with a simple
|
* can be cloned by copying the context (e.g. with a simple
|
||||||
* <code>memcpy()</code>).
|
* <code>memcpy()</code>).
|
||||||
*/
|
*/
|
||||||
|
#if 0
|
||||||
typedef struct {
|
typedef struct {
|
||||||
#ifndef DOXYGEN_IGNORE
|
#ifndef DOXYGEN_IGNORE
|
||||||
unsigned char buf[64]; /* first field, for alignment */
|
unsigned char buf[64]; /* first field, for alignment */
|
||||||
@@ -204,6 +205,8 @@ void sph_ripemd128_close(void *cc, void *dst);
|
|||||||
*/
|
*/
|
||||||
void sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]);
|
void sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
/* ===================================================================== */
|
/* ===================================================================== */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -18,10 +18,13 @@ static const uint32_t IV512[] =
|
|||||||
0xE275EADE, 0x502D9FCD, 0xB9357178, 0x022A4B9A
|
0xE275EADE, 0x502D9FCD, 0xB9357178, 0x022A4B9A
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
#define mm256_ror2x256hi_1x32( a, b ) \
|
#define mm256_ror2x256hi_1x32( a, b ) \
|
||||||
_mm256_blend_epi32( mm256_shuflr128_32( a ), \
|
_mm256_blend_epi32( mm256_shuflr128_32( a ), \
|
||||||
mm256_shuflr128_32( b ), 0x88 )
|
mm256_shuflr128_32( b ), 0x88 )
|
||||||
|
*/
|
||||||
|
|
||||||
|
//#define mm256_ror2x256hi_1x32( a, b ) _mm256_alignr_epi8( b, a, 4 )
|
||||||
|
|
||||||
#if defined(__VAES__)
|
#if defined(__VAES__)
|
||||||
|
|
||||||
@@ -127,24 +130,24 @@ c512_2way( shavite512_2way_context *ctx, const void *msg )
|
|||||||
|
|
||||||
// round 2, 6, 10
|
// round 2, 6, 10
|
||||||
|
|
||||||
k00 = _mm256_xor_si256( k00, mm256_ror2x256hi_1x32( k12, k13 ) );
|
k00 = _mm256_xor_si256( k00, _mm256_alignr_epi8( k13, k12, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( p3, k00 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( p3, k00 ), zero );
|
||||||
k01 = _mm256_xor_si256( k01, mm256_ror2x256hi_1x32( k13, k00 ) );
|
k01 = _mm256_xor_si256( k01, _mm256_alignr_epi8( k00, k13, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k01 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k01 ), zero );
|
||||||
k02 = _mm256_xor_si256( k02, mm256_ror2x256hi_1x32( k00, k01 ) );
|
k02 = _mm256_xor_si256( k02, _mm256_alignr_epi8( k01, k00, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k02 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k02 ), zero );
|
||||||
k03 = _mm256_xor_si256( k03, mm256_ror2x256hi_1x32( k01, k02 ) );
|
k03 = _mm256_xor_si256( k03, _mm256_alignr_epi8( k02, k01, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k03 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k03 ), zero );
|
||||||
|
|
||||||
p2 = _mm256_xor_si256( p2, x );
|
p2 = _mm256_xor_si256( p2, x );
|
||||||
|
|
||||||
k10 = _mm256_xor_si256( k10, mm256_ror2x256hi_1x32( k02, k03 ) );
|
k10 = _mm256_xor_si256( k10, _mm256_alignr_epi8( k03, k02, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( p1, k10 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( p1, k10 ), zero );
|
||||||
k11 = _mm256_xor_si256( k11, mm256_ror2x256hi_1x32( k03, k10 ) );
|
k11 = _mm256_xor_si256( k11, _mm256_alignr_epi8( k10, k03, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k11 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k11 ), zero );
|
||||||
k12 = _mm256_xor_si256( k12, mm256_ror2x256hi_1x32( k10, k11 ) );
|
k12 = _mm256_xor_si256( k12, _mm256_alignr_epi8( k11, k10, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k12 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k12 ), zero );
|
||||||
k13 = _mm256_xor_si256( k13, mm256_ror2x256hi_1x32( k11, k12 ) );
|
k13 = _mm256_xor_si256( k13, _mm256_alignr_epi8( k12, k11, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k13 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k13 ), zero );
|
||||||
|
|
||||||
p0 = _mm256_xor_si256( p0, x );
|
p0 = _mm256_xor_si256( p0, x );
|
||||||
@@ -183,24 +186,24 @@ c512_2way( shavite512_2way_context *ctx, const void *msg )
|
|||||||
|
|
||||||
// round 4, 8, 12
|
// round 4, 8, 12
|
||||||
|
|
||||||
k00 = _mm256_xor_si256( k00, mm256_ror2x256hi_1x32( k12, k13 ) );
|
k00 = _mm256_xor_si256( k00, _mm256_alignr_epi8( k13, k12, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( p1, k00 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( p1, k00 ), zero );
|
||||||
k01 = _mm256_xor_si256( k01, mm256_ror2x256hi_1x32( k13, k00 ) );
|
k01 = _mm256_xor_si256( k01, _mm256_alignr_epi8( k00, k13, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k01 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k01 ), zero );
|
||||||
k02 = _mm256_xor_si256( k02, mm256_ror2x256hi_1x32( k00, k01 ) );
|
k02 = _mm256_xor_si256( k02, _mm256_alignr_epi8( k01, k00, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k02 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k02 ), zero );
|
||||||
k03 = _mm256_xor_si256( k03, mm256_ror2x256hi_1x32( k01, k02 ) );
|
k03 = _mm256_xor_si256( k03, _mm256_alignr_epi8( k02, k01, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k03 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k03 ), zero );
|
||||||
|
|
||||||
p0 = _mm256_xor_si256( p0, x );
|
p0 = _mm256_xor_si256( p0, x );
|
||||||
|
|
||||||
k10 = _mm256_xor_si256( k10, mm256_ror2x256hi_1x32( k02, k03 ) );
|
k10 = _mm256_xor_si256( k10, _mm256_alignr_epi8( k03, k02, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( p3, k10 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( p3, k10 ), zero );
|
||||||
k11 = _mm256_xor_si256( k11, mm256_ror2x256hi_1x32( k03, k10 ) );
|
k11 = _mm256_xor_si256( k11, _mm256_alignr_epi8( k10, k03, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k11 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k11 ), zero );
|
||||||
k12 = _mm256_xor_si256( k12, mm256_ror2x256hi_1x32( k10, k11 ) );
|
k12 = _mm256_xor_si256( k12, _mm256_alignr_epi8( k11, k10, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k12 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k12 ), zero );
|
||||||
k13 = _mm256_xor_si256( k13, mm256_ror2x256hi_1x32( k11, k12 ) );
|
k13 = _mm256_xor_si256( k13, _mm256_alignr_epi8( k12, k11, 4 ) );
|
||||||
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k13 ), zero );
|
x = mm256_aesenc_2x128( _mm256_xor_si256( x, k13 ), zero );
|
||||||
|
|
||||||
p2 = _mm256_xor_si256( p2, x );
|
p2 = _mm256_xor_si256( p2, x );
|
||||||
|
@@ -11,10 +11,6 @@ static const uint32_t IV512[] =
|
|||||||
0xE275EADE, 0x502D9FCD, 0xB9357178, 0x022A4B9A
|
0xE275EADE, 0x502D9FCD, 0xB9357178, 0x022A4B9A
|
||||||
};
|
};
|
||||||
|
|
||||||
#define mm512_ror2x512hi_1x32( a, b ) \
|
|
||||||
_mm512_mask_blend_epi32( 0x8888, mm512_shuflr128_32( a ), \
|
|
||||||
mm512_shuflr128_32( b ) )
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
c512_4way( shavite512_4way_context *ctx, const void *msg )
|
c512_4way( shavite512_4way_context *ctx, const void *msg )
|
||||||
{
|
{
|
||||||
@@ -106,24 +102,24 @@ c512_4way( shavite512_4way_context *ctx, const void *msg )
|
|||||||
|
|
||||||
// round 2, 6, 10
|
// round 2, 6, 10
|
||||||
|
|
||||||
K0 = _mm512_xor_si512( K0, mm512_ror2x512hi_1x32( K6, K7 ) );
|
K0 = _mm512_xor_si512( K0, _mm512_alignr_epi8( K7, K6, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K0 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K0 ), m512_zero );
|
||||||
K1 = _mm512_xor_si512( K1, mm512_ror2x512hi_1x32( K7, K0 ) );
|
K1 = _mm512_xor_si512( K1, _mm512_alignr_epi8( K0, K7, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
|
||||||
K2 = _mm512_xor_si512( K2, mm512_ror2x512hi_1x32( K0, K1 ) );
|
K2 = _mm512_xor_si512( K2, _mm512_alignr_epi8( K1, K0, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
|
||||||
K3 = _mm512_xor_si512( K3, mm512_ror2x512hi_1x32( K1, K2 ) );
|
K3 = _mm512_xor_si512( K3, _mm512_alignr_epi8( K2, K1, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
|
||||||
|
|
||||||
P2 = _mm512_xor_si512( P2, X );
|
P2 = _mm512_xor_si512( P2, X );
|
||||||
|
|
||||||
K4 = _mm512_xor_si512( K4, mm512_ror2x512hi_1x32( K2, K3 ) );
|
K4 = _mm512_xor_si512( K4, _mm512_alignr_epi8( K3, K2, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K4 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K4 ), m512_zero );
|
||||||
K5 = _mm512_xor_si512( K5, mm512_ror2x512hi_1x32( K3, K4 ) );
|
K5 = _mm512_xor_si512( K5, _mm512_alignr_epi8( K4, K3, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
|
||||||
K6 = _mm512_xor_si512( K6, mm512_ror2x512hi_1x32( K4, K5 ) );
|
K6 = _mm512_xor_si512( K6, _mm512_alignr_epi8( K5, K4, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
|
||||||
K7 = _mm512_xor_si512( K7, mm512_ror2x512hi_1x32( K5, K6 ) );
|
K7 = _mm512_xor_si512( K7, _mm512_alignr_epi8( K6, K5, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
|
||||||
|
|
||||||
P0 = _mm512_xor_si512( P0, X );
|
P0 = _mm512_xor_si512( P0, X );
|
||||||
@@ -162,24 +158,24 @@ c512_4way( shavite512_4way_context *ctx, const void *msg )
|
|||||||
|
|
||||||
// round 4, 8, 12
|
// round 4, 8, 12
|
||||||
|
|
||||||
K0 = _mm512_xor_si512( K0, mm512_ror2x512hi_1x32( K6, K7 ) );
|
K0 = _mm512_xor_si512( K0, _mm512_alignr_epi8( K7, K6, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K0 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K0 ), m512_zero );
|
||||||
K1 = _mm512_xor_si512( K1, mm512_ror2x512hi_1x32( K7, K0 ) );
|
K1 = _mm512_xor_si512( K1, _mm512_alignr_epi8( K0, K7, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
|
||||||
K2 = _mm512_xor_si512( K2, mm512_ror2x512hi_1x32( K0, K1 ) );
|
K2 = _mm512_xor_si512( K2, _mm512_alignr_epi8( K1, K0, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
|
||||||
K3 = _mm512_xor_si512( K3, mm512_ror2x512hi_1x32( K1, K2 ) );
|
K3 = _mm512_xor_si512( K3, _mm512_alignr_epi8( K2, K1, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
|
||||||
|
|
||||||
P0 = _mm512_xor_si512( P0, X );
|
P0 = _mm512_xor_si512( P0, X );
|
||||||
|
|
||||||
K4 = _mm512_xor_si512( K4, mm512_ror2x512hi_1x32( K2, K3 ) );
|
K4 = _mm512_xor_si512( K4, _mm512_alignr_epi8( K3, K2, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K4 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K4 ), m512_zero );
|
||||||
K5 = _mm512_xor_si512( K5, mm512_ror2x512hi_1x32( K3, K4 ) );
|
K5 = _mm512_xor_si512( K5, _mm512_alignr_epi8( K4, K3, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
|
||||||
K6 = _mm512_xor_si512( K6, mm512_ror2x512hi_1x32( K4, K5 ) );
|
K6 = _mm512_xor_si512( K6, _mm512_alignr_epi8( K5, K4, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
|
||||||
K7 = _mm512_xor_si512( K7, mm512_ror2x512hi_1x32( K5, K6 ) );
|
K7 = _mm512_xor_si512( K7, _mm512_alignr_epi8( K6, K5, 4 ) );
|
||||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
|
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
|
||||||
|
|
||||||
P2 = _mm512_xor_si512( P2, X );
|
P2 = _mm512_xor_si512( P2, X );
|
||||||
|
@@ -59,30 +59,6 @@ static const sph_u32 IV512[] = {
|
|||||||
C32(0xE275EADE), C32(0x502D9FCD), C32(0xB9357178), C32(0x022A4B9A)
|
C32(0xE275EADE), C32(0x502D9FCD), C32(0xB9357178), C32(0x022A4B9A)
|
||||||
};
|
};
|
||||||
|
|
||||||
// Partially rotate elements in two 128 bit vectors a & b as one 256 bit vector
|
|
||||||
// and return the rotated 128 bit vector a.
|
|
||||||
// a[3:0] = { b[0], a[3], a[2], a[1] }
|
|
||||||
#if defined(__SSSE3__)
|
|
||||||
|
|
||||||
#define mm128_ror256hi_1x32( a, b ) _mm_alignr_epi8( b, a, 4 )
|
|
||||||
|
|
||||||
#else // SSE2
|
|
||||||
|
|
||||||
#define mm128_ror256hi_1x32( a, b ) \
|
|
||||||
_mm_or_si128( _mm_srli_si128( a, 4 ), \
|
|
||||||
_mm_slli_si128( b, 12 ) )
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
#if defined(__AVX2__)
|
|
||||||
// 2 way version of above
|
|
||||||
// a[7:0] = { b[4], a[7], a[6], a[5], b[0], a[3], a[2], a[1] }
|
|
||||||
#define mm256_ror2x256hi_1x32( a, b ) \
|
|
||||||
_mm256_blend_epi32( mm256_ror256_1x32( a ), \
|
|
||||||
mm256_rol256_3x32( b ), 0x88 )
|
|
||||||
#endif
|
|
||||||
*/
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
c512( sph_shavite_big_context *sc, const void *msg )
|
c512( sph_shavite_big_context *sc, const void *msg )
|
||||||
@@ -190,31 +166,31 @@ c512( sph_shavite_big_context *sc, const void *msg )
|
|||||||
|
|
||||||
// round 2, 6, 10
|
// round 2, 6, 10
|
||||||
|
|
||||||
k00 = _mm_xor_si128( k00, mm128_ror256hi_1x32( k12, k13 ) );
|
k00 = _mm_xor_si128( k00, _mm_alignr_epi8( k13, k12, 4 ) );
|
||||||
x = _mm_xor_si128( p3, k00 );
|
x = _mm_xor_si128( p3, k00 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
k01 = _mm_xor_si128( k01, mm128_ror256hi_1x32( k13, k00 ) );
|
k01 = _mm_xor_si128( k01, _mm_alignr_epi8( k00, k13, 4 ) );
|
||||||
x = _mm_xor_si128( x, k01 );
|
x = _mm_xor_si128( x, k01 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
k02 = _mm_xor_si128( k02, mm128_ror256hi_1x32( k00, k01 ) );
|
k02 = _mm_xor_si128( k02, _mm_alignr_epi8( k01, k00, 4 ) );
|
||||||
x = _mm_xor_si128( x, k02 );
|
x = _mm_xor_si128( x, k02 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
k03 = _mm_xor_si128( k03, mm128_ror256hi_1x32( k01, k02 ) );
|
k03 = _mm_xor_si128( k03, _mm_alignr_epi8( k02, k01, 4 ) );
|
||||||
x = _mm_xor_si128( x, k03 );
|
x = _mm_xor_si128( x, k03 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
|
|
||||||
p2 = _mm_xor_si128( p2, x );
|
p2 = _mm_xor_si128( p2, x );
|
||||||
|
|
||||||
k10 = _mm_xor_si128( k10, mm128_ror256hi_1x32( k02, k03 ) );
|
k10 = _mm_xor_si128( k10, _mm_alignr_epi8( k03, k02, 4 ) );
|
||||||
x = _mm_xor_si128( p1, k10 );
|
x = _mm_xor_si128( p1, k10 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
k11 = _mm_xor_si128( k11, mm128_ror256hi_1x32( k03, k10 ) );
|
k11 = _mm_xor_si128( k11, _mm_alignr_epi8( k10, k03, 4 ) );
|
||||||
x = _mm_xor_si128( x, k11 );
|
x = _mm_xor_si128( x, k11 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
k12 = _mm_xor_si128( k12, mm128_ror256hi_1x32( k10, k11 ) );
|
k12 = _mm_xor_si128( k12, _mm_alignr_epi8( k11, k10, 4 ) );
|
||||||
x = _mm_xor_si128( x, k12 );
|
x = _mm_xor_si128( x, k12 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
k13 = _mm_xor_si128( k13, mm128_ror256hi_1x32( k11, k12 ) );
|
k13 = _mm_xor_si128( k13, _mm_alignr_epi8( k12, k11, 4 ) );
|
||||||
x = _mm_xor_si128( x, k13 );
|
x = _mm_xor_si128( x, k13 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
|
|
||||||
@@ -262,31 +238,31 @@ c512( sph_shavite_big_context *sc, const void *msg )
|
|||||||
|
|
||||||
// round 4, 8, 12
|
// round 4, 8, 12
|
||||||
|
|
||||||
k00 = _mm_xor_si128( k00, mm128_ror256hi_1x32( k12, k13 ) );
|
k00 = _mm_xor_si128( k00, _mm_alignr_epi8( k13, k12, 4 ) );
|
||||||
x = _mm_xor_si128( p1, k00 );
|
x = _mm_xor_si128( p1, k00 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
k01 = _mm_xor_si128( k01, mm128_ror256hi_1x32( k13, k00 ) );
|
k01 = _mm_xor_si128( k01, _mm_alignr_epi8( k00, k13, 4 ) );
|
||||||
x = _mm_xor_si128( x, k01 );
|
x = _mm_xor_si128( x, k01 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
k02 = _mm_xor_si128( k02, mm128_ror256hi_1x32( k00, k01 ) );
|
k02 = _mm_xor_si128( k02, _mm_alignr_epi8( k01, k00, 4 ) );
|
||||||
x = _mm_xor_si128( x, k02 );
|
x = _mm_xor_si128( x, k02 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
k03 = _mm_xor_si128( k03, mm128_ror256hi_1x32( k01, k02 ) );
|
k03 = _mm_xor_si128( k03, _mm_alignr_epi8( k02, k01, 4 ) );
|
||||||
x = _mm_xor_si128( x, k03 );
|
x = _mm_xor_si128( x, k03 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
|
|
||||||
p0 = _mm_xor_si128( p0, x );
|
p0 = _mm_xor_si128( p0, x );
|
||||||
|
|
||||||
k10 = _mm_xor_si128( k10, mm128_ror256hi_1x32( k02, k03 ) );
|
k10 = _mm_xor_si128( k10, _mm_alignr_epi8( k03, k02, 4 ) );
|
||||||
x = _mm_xor_si128( p3, k10 );
|
x = _mm_xor_si128( p3, k10 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
k11 = _mm_xor_si128( k11, mm128_ror256hi_1x32( k03, k10 ) );
|
k11 = _mm_xor_si128( k11, _mm_alignr_epi8( k10, k03, 4 ) );
|
||||||
x = _mm_xor_si128( x, k11 );
|
x = _mm_xor_si128( x, k11 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
k12 = _mm_xor_si128( k12, mm128_ror256hi_1x32( k10, k11 ) );
|
k12 = _mm_xor_si128( k12, _mm_alignr_epi8( k11, k10, 4 ) );
|
||||||
x = _mm_xor_si128( x, k12 );
|
x = _mm_xor_si128( x, k12 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
k13 = _mm_xor_si128( k13, mm128_ror256hi_1x32( k11, k12 ) );
|
k13 = _mm_xor_si128( k13, _mm_alignr_epi8( k12, k11, 4 ) );
|
||||||
x = _mm_xor_si128( x, k13 );
|
x = _mm_xor_si128( x, k13 );
|
||||||
x = _mm_aesenc_si128( x, zero );
|
x = _mm_aesenc_si128( x, zero );
|
||||||
|
|
||||||
|
@@ -35,7 +35,7 @@
|
|||||||
|
|
||||||
#include "sph_shavite.h"
|
#include "sph_shavite.h"
|
||||||
|
|
||||||
#if !defined(__AES__)
|
#if !(defined(__AES__) && defined(__SSSE3__))
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C"{
|
extern "C"{
|
||||||
|
@@ -263,7 +263,7 @@ void sph_shavite384_addbits_and_close(
|
|||||||
void *cc, unsigned ub, unsigned n, void *dst);
|
void *cc, unsigned ub, unsigned n, void *dst);
|
||||||
|
|
||||||
//Don't call these directly from application code, use the macros below.
|
//Don't call these directly from application code, use the macros below.
|
||||||
#ifdef __AES__
|
#if defined(__AES__) && defined(__SSSE3__)
|
||||||
|
|
||||||
void sph_shavite512_aesni_init(void *cc);
|
void sph_shavite512_aesni_init(void *cc);
|
||||||
void sph_shavite512_aesni(void *cc, const void *data, size_t len);
|
void sph_shavite512_aesni(void *cc, const void *data, size_t len);
|
||||||
|
@@ -74,7 +74,7 @@ typedef struct {
|
|||||||
|
|
||||||
void sm3_init(sm3_ctx_t *ctx);
|
void sm3_init(sm3_ctx_t *ctx);
|
||||||
void sm3_update(sm3_ctx_t *ctx, const unsigned char* data, size_t data_len);
|
void sm3_update(sm3_ctx_t *ctx, const unsigned char* data, size_t data_len);
|
||||||
void sm3_final(sm3_ctx_t *ctx, unsigned char digest[SM3_DIGEST_LENGTH]);
|
void sm3_final(sm3_ctx_t *ctx, unsigned char *digest);
|
||||||
void sm3_compress(uint32_t digest[8], const unsigned char block[SM3_BLOCK_SIZE]);
|
void sm3_compress(uint32_t digest[8], const unsigned char block[SM3_BLOCK_SIZE]);
|
||||||
void sm3(const unsigned char *data, size_t datalen,
|
void sm3(const unsigned char *data, size_t datalen,
|
||||||
unsigned char digest[SM3_DIGEST_LENGTH]);
|
unsigned char digest[SM3_DIGEST_LENGTH]);
|
||||||
|
@@ -87,16 +87,17 @@ int verthash_info_init(verthash_info_t* info, const char* file_name)
|
|||||||
// Allocate data
|
// Allocate data
|
||||||
info->data = (uint8_t *)malloc_hugepages( fileSize );
|
info->data = (uint8_t *)malloc_hugepages( fileSize );
|
||||||
if ( info->data )
|
if ( info->data )
|
||||||
if ( !opt_quiet ) applog( LOG_INFO, "Verthash data is using huge pages");
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
|
if ( !opt_quiet ) applog( LOG_INFO, "Verthash data is using huge pages");
|
||||||
|
}
|
||||||
|
else
|
||||||
info->data = (uint8_t *)_mm_malloc( fileSize, 64 );
|
info->data = (uint8_t *)_mm_malloc( fileSize, 64 );
|
||||||
if (!info->data)
|
|
||||||
{
|
if ( !info->data )
|
||||||
fclose(fileMiningData);
|
{
|
||||||
// Memory allocation fatal error.
|
fclose( fileMiningData );
|
||||||
return 2;
|
// Memory allocation fatal error.
|
||||||
}
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load data
|
// Load data
|
||||||
|
@@ -24,15 +24,15 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( bench ) ptarget[7] = 0x0cff;
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||||
uint32_t ntime = bswap_32( pdata[17] );
|
uint32_t masked_ntime = bswap_32( pdata[17] ) & 0xffffff80;
|
||||||
if ( s_ntime != ntime )
|
if ( s_ntime != masked_ntime )
|
||||||
{
|
{
|
||||||
x16rt_getTimeHash( ntime, &timeHash );
|
x16rt_getTimeHash( masked_ntime, &timeHash );
|
||||||
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
||||||
s_ntime = ntime;
|
s_ntime = masked_ntime;
|
||||||
if ( opt_debug && !thr_id )
|
if ( opt_debug && !thr_id )
|
||||||
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
||||||
x16r_hash_order, ntime, timeHash );
|
x16r_hash_order, bswap_32( pdata[17] ), timeHash );
|
||||||
}
|
}
|
||||||
|
|
||||||
x16r_8way_prehash( vdata, pdata );
|
x16r_8way_prehash( vdata, pdata );
|
||||||
@@ -78,15 +78,15 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( bench ) ptarget[7] = 0x0cff;
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||||
uint32_t ntime = bswap_32( pdata[17] );
|
uint32_t masked_ntime = bswap_32( pdata[17] ) & 0xffffff80;
|
||||||
if ( s_ntime != ntime )
|
if ( s_ntime != masked_ntime )
|
||||||
{
|
{
|
||||||
x16rt_getTimeHash( ntime, &timeHash );
|
x16rt_getTimeHash( masked_ntime, &timeHash );
|
||||||
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
||||||
s_ntime = ntime;
|
s_ntime = masked_ntime;
|
||||||
if ( opt_debug && !thr_id )
|
if ( opt_debug && !thr_id )
|
||||||
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
||||||
x16r_hash_order, ntime, timeHash );
|
x16r_hash_order, bswap_32( pdata[17] ), timeHash );
|
||||||
}
|
}
|
||||||
|
|
||||||
x16r_4way_prehash( vdata, pdata );
|
x16r_4way_prehash( vdata, pdata );
|
||||||
|
@@ -20,15 +20,15 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
|||||||
mm128_bswap32_80( edata, pdata );
|
mm128_bswap32_80( edata, pdata );
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||||
uint32_t ntime = swab32( pdata[17] );
|
uint32_t masked_ntime = swab32( pdata[17] ) & 0xffffff80;
|
||||||
if ( s_ntime != ntime )
|
if ( s_ntime != masked_ntime )
|
||||||
{
|
{
|
||||||
x16rt_getTimeHash( ntime, &timeHash );
|
x16rt_getTimeHash( masked_ntime, &timeHash );
|
||||||
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
||||||
s_ntime = ntime;
|
s_ntime = masked_ntime;
|
||||||
if ( opt_debug && !thr_id )
|
if ( opt_debug && !thr_id )
|
||||||
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
||||||
x16r_hash_order, ntime, timeHash );
|
x16r_hash_order, swab32( pdata[17] ), timeHash );
|
||||||
}
|
}
|
||||||
|
|
||||||
x16r_prehash( edata, pdata );
|
x16r_prehash( edata, pdata );
|
||||||
|
@@ -36,8 +36,8 @@ mv cpuminer cpuminer-avx2-sha-vaes
|
|||||||
# AVX2 SHA AES: AMD Zen1
|
# AVX2 SHA AES: AMD Zen1
|
||||||
make clean || echo done
|
make clean || echo done
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -march=znver1 -maes -Wall -fno-common" ./configure --with-curl
|
#CFLAGS="-O3 -march=znver1 -maes -Wall -fno-common" ./configure --with-curl
|
||||||
#CFLAGS="-O3 -maes -mavx2 -msha -Wall -fno-common" ./configure --with-curl
|
CFLAGS="-O3 -maes -mavx2 -msha -Wall -fno-common" ./configure --with-curl
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer
|
strip -s cpuminer
|
||||||
mv cpuminer cpuminer-avx2-sha
|
mv cpuminer cpuminer-avx2-sha
|
||||||
|
10
build-msys2.sh
Executable file
10
build-msys2.sh
Executable file
@@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Compile on Windows using MSYS2 and MinGW.
|
||||||
|
|
||||||
|
make distclean || echo clean
|
||||||
|
rm -f config.status
|
||||||
|
./autogen.sh || echo done
|
||||||
|
CFLAGS="-O3 --param=evrp-mode=legacy -march=native -Wall -D_WIN32_WINNT=0x0601" ./configure --with-curl
|
||||||
|
make -j 4
|
||||||
|
strip -s cpuminer
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.19.3.
|
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.19.7.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='cpuminer-opt'
|
PACKAGE_NAME='cpuminer-opt'
|
||||||
PACKAGE_TARNAME='cpuminer-opt'
|
PACKAGE_TARNAME='cpuminer-opt'
|
||||||
PACKAGE_VERSION='3.19.3'
|
PACKAGE_VERSION='3.19.7'
|
||||||
PACKAGE_STRING='cpuminer-opt 3.19.3'
|
PACKAGE_STRING='cpuminer-opt 3.19.7'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures cpuminer-opt 3.19.3 to adapt to many kinds of systems.
|
\`configure' configures cpuminer-opt 3.19.7 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1404,7 +1404,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of cpuminer-opt 3.19.3:";;
|
short | recursive ) echo "Configuration of cpuminer-opt 3.19.7:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1509,7 +1509,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
cpuminer-opt configure 3.19.3
|
cpuminer-opt configure 3.19.7
|
||||||
generated by GNU Autoconf 2.69
|
generated by GNU Autoconf 2.69
|
||||||
|
|
||||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by cpuminer-opt $as_me 3.19.3, which was
|
It was created by cpuminer-opt $as_me 3.19.7, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
$ $0 $@
|
$ $0 $@
|
||||||
@@ -2993,7 +2993,7 @@ fi
|
|||||||
|
|
||||||
# Define the identity of the package.
|
# Define the identity of the package.
|
||||||
PACKAGE='cpuminer-opt'
|
PACKAGE='cpuminer-opt'
|
||||||
VERSION='3.19.3'
|
VERSION='3.19.7'
|
||||||
|
|
||||||
|
|
||||||
cat >>confdefs.h <<_ACEOF
|
cat >>confdefs.h <<_ACEOF
|
||||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by cpuminer-opt $as_me 3.19.3, which was
|
This file was extended by cpuminer-opt $as_me 3.19.7, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
cpuminer-opt config.status 3.19.3
|
cpuminer-opt config.status 3.19.7
|
||||||
configured by $0, generated by GNU Autoconf 2.69,
|
configured by $0, generated by GNU Autoconf 2.69,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
AC_INIT([cpuminer-opt], [3.19.3])
|
AC_INIT([cpuminer-opt], [3.19.7])
|
||||||
|
|
||||||
AC_PREREQ([2.59c])
|
AC_PREREQ([2.59c])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_SYSTEM
|
||||||
|
188
cpu-miner.c
188
cpu-miner.c
@@ -105,8 +105,9 @@ bool opt_randomize = false;
|
|||||||
static int opt_retries = -1;
|
static int opt_retries = -1;
|
||||||
static int opt_fail_pause = 10;
|
static int opt_fail_pause = 10;
|
||||||
static int opt_time_limit = 0;
|
static int opt_time_limit = 0;
|
||||||
|
static unsigned int time_limit_stop = 0;
|
||||||
int opt_timeout = 300;
|
int opt_timeout = 300;
|
||||||
static int opt_scantime = 5;
|
static int opt_scantime = 0;
|
||||||
const int min_scantime = 1;
|
const int min_scantime = 1;
|
||||||
//static const bool opt_time = true;
|
//static const bool opt_time = true;
|
||||||
enum algos opt_algo = ALGO_NULL;
|
enum algos opt_algo = ALGO_NULL;
|
||||||
@@ -127,6 +128,12 @@ char *short_url = NULL;
|
|||||||
char *coinbase_address;
|
char *coinbase_address;
|
||||||
char *opt_data_file = NULL;
|
char *opt_data_file = NULL;
|
||||||
bool opt_verify = false;
|
bool opt_verify = false;
|
||||||
|
static bool opt_stratum_keepalive = false;
|
||||||
|
static struct timeval stratum_keepalive_timer;
|
||||||
|
// Stratum typically times out in 5 minutes or 300 seconds
|
||||||
|
#define stratum_keepalive_timeout 180 // 3 minutes
|
||||||
|
static struct timeval stratum_reset_time;
|
||||||
|
|
||||||
|
|
||||||
// pk_buffer_size is used as a version selector by b58 code, therefore
|
// pk_buffer_size is used as a version selector by b58 code, therefore
|
||||||
// it must be set correctly to work.
|
// it must be set correctly to work.
|
||||||
@@ -187,7 +194,6 @@ int default_api_listen = 4048;
|
|||||||
static struct timeval session_start;
|
static struct timeval session_start;
|
||||||
static struct timeval five_min_start;
|
static struct timeval five_min_start;
|
||||||
static uint64_t session_first_block = 0;
|
static uint64_t session_first_block = 0;
|
||||||
static double latency_sum = 0.;
|
|
||||||
static uint64_t submit_sum = 0;
|
static uint64_t submit_sum = 0;
|
||||||
static uint64_t accept_sum = 0;
|
static uint64_t accept_sum = 0;
|
||||||
static uint64_t stale_sum = 0;
|
static uint64_t stale_sum = 0;
|
||||||
@@ -336,6 +342,7 @@ void get_currentalgo(char* buf, int sz)
|
|||||||
|
|
||||||
void proper_exit(int reason)
|
void proper_exit(int reason)
|
||||||
{
|
{
|
||||||
|
if (opt_debug) applog(LOG_INFO,"Program exit");
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
if (opt_background) {
|
if (opt_background) {
|
||||||
HWND hcon = GetConsoleWindow();
|
HWND hcon = GetConsoleWindow();
|
||||||
@@ -1143,7 +1150,7 @@ void report_summary_log( bool force )
|
|||||||
solved, solved_block_count );
|
solved, solved_block_count );
|
||||||
}
|
}
|
||||||
if ( stratum_errors )
|
if ( stratum_errors )
|
||||||
applog2( LOG_INFO, "Stratum errors %7d", stratum_errors );
|
applog2( LOG_INFO, "Stratum resets %7d", stratum_errors );
|
||||||
|
|
||||||
applog2( LOG_INFO, "Hi/Lo Share Diff %.5g / %.5g",
|
applog2( LOG_INFO, "Hi/Lo Share Diff %.5g / %.5g",
|
||||||
highest_share, lowest_share );
|
highest_share, lowest_share );
|
||||||
@@ -1274,7 +1281,6 @@ static int share_result( int result, struct work *work,
|
|||||||
else reject_sum++;
|
else reject_sum++;
|
||||||
}
|
}
|
||||||
submit_sum++;
|
submit_sum++;
|
||||||
latency_sum += latency;
|
|
||||||
|
|
||||||
pthread_mutex_unlock( &stats_lock );
|
pthread_mutex_unlock( &stats_lock );
|
||||||
|
|
||||||
@@ -1290,9 +1296,9 @@ static int share_result( int result, struct work *work,
|
|||||||
else rcol = CL_LRD;
|
else rcol = CL_LRD;
|
||||||
}
|
}
|
||||||
|
|
||||||
applog( LOG_INFO, "%d %s%s %s%s %s%s %s%s" CL_WHT ", %.3f sec (%dms)",
|
applog( LOG_INFO, "%d %s%s %s%s %s%s %s%s%s, %.3f sec (%dms)",
|
||||||
my_stats.share_count, acol, ares, scol, sres, rcol, rres, bcol,
|
my_stats.share_count, acol, ares, scol, sres, rcol, rres, bcol,
|
||||||
bres, share_time, latency );
|
bres, CL_N, share_time, latency );
|
||||||
|
|
||||||
if ( unlikely( opt_debug || !result || solved ) )
|
if ( unlikely( opt_debug || !result || solved ) )
|
||||||
{
|
{
|
||||||
@@ -2110,7 +2116,7 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
|||||||
{
|
{
|
||||||
unsigned char *xnonce2str = bebin2hex( g_work->xnonce2,
|
unsigned char *xnonce2str = bebin2hex( g_work->xnonce2,
|
||||||
g_work->xnonce2_len );
|
g_work->xnonce2_len );
|
||||||
applog( LOG_INFO, "Extranonce2 %s, Block %d, Job %s",
|
applog( LOG_INFO, "Extranonce2 0x%s, Block %d, Job %s",
|
||||||
xnonce2str, sctx->block_height, g_work->job_id );
|
xnonce2str, sctx->block_height, g_work->job_id );
|
||||||
free( xnonce2str );
|
free( xnonce2str );
|
||||||
}
|
}
|
||||||
@@ -2197,8 +2203,6 @@ static void *miner_thread( void *userdata )
|
|||||||
// : 0;
|
// : 0;
|
||||||
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20;
|
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20;
|
||||||
|
|
||||||
time_t firstwork_time = 0;
|
|
||||||
int i;
|
|
||||||
memset( &work, 0, sizeof(work) );
|
memset( &work, 0, sizeof(work) );
|
||||||
|
|
||||||
/* Set worker threads to nice 19 and then preferentially to SCHED_IDLE
|
/* Set worker threads to nice 19 and then preferentially to SCHED_IDLE
|
||||||
@@ -2242,7 +2246,7 @@ static void *miner_thread( void *userdata )
|
|||||||
|
|
||||||
if ( !algo_gate.miner_thread_init( thr_id ) )
|
if ( !algo_gate.miner_thread_init( thr_id ) )
|
||||||
{
|
{
|
||||||
applog( LOG_ERR, "FAIL: thread %u failed to initialize", thr_id );
|
applog( LOG_ERR, "FAIL: thread %d failed to initialize", thr_id );
|
||||||
exit (1);
|
exit (1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2270,22 +2274,34 @@ static void *miner_thread( void *userdata )
|
|||||||
{
|
{
|
||||||
while ( unlikely( stratum_down ) )
|
while ( unlikely( stratum_down ) )
|
||||||
sleep( 1 );
|
sleep( 1 );
|
||||||
if ( *nonceptr >= end_nonce )
|
if ( unlikely( ( *nonceptr >= end_nonce )
|
||||||
stratum_gen_work( &stratum, &g_work );
|
&& !work_restart[thr_id].restart ) )
|
||||||
|
{
|
||||||
|
if ( opt_extranonce )
|
||||||
|
stratum_gen_work( &stratum, &g_work );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ( !thr_id )
|
||||||
|
{
|
||||||
|
applog( LOG_WARNING, "nonce range exhausted, extranonce not subscribed" );
|
||||||
|
applog( LOG_WARNING, "waiting for new work...");
|
||||||
|
}
|
||||||
|
while ( !work_restart[thr_id].restart )
|
||||||
|
sleep ( 1 );
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else if ( !opt_benchmark ) // GBT or getwork
|
||||||
{
|
{
|
||||||
pthread_rwlock_wrlock( &g_work_lock );
|
pthread_rwlock_wrlock( &g_work_lock );
|
||||||
|
|
||||||
if ( ( ( time(NULL) - g_work_time )
|
if ( ( ( time(NULL) - g_work_time ) >= opt_scantime )
|
||||||
>= ( have_longpoll ? LP_SCANTIME : opt_scantime ) )
|
|
||||||
|| ( *nonceptr >= end_nonce ) )
|
|| ( *nonceptr >= end_nonce ) )
|
||||||
{
|
{
|
||||||
if ( unlikely( !get_work( mythr, &g_work ) ) )
|
if ( unlikely( !get_work( mythr, &g_work ) ) )
|
||||||
{
|
{
|
||||||
pthread_rwlock_unlock( &g_work_lock );
|
pthread_rwlock_unlock( &g_work_lock );
|
||||||
applog( LOG_ERR, "work retrieval failed, exiting "
|
applog( LOG_ERR, "work retrieval failed, exiting miner thread %d", thr_id );
|
||||||
"mining thread %d", thr_id );
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
g_work_time = time(NULL);
|
g_work_time = time(NULL);
|
||||||
@@ -2308,25 +2324,14 @@ static void *miner_thread( void *userdata )
|
|||||||
if ( unlikely( !algo_gate.ready_to_mine( &work, &stratum, thr_id ) ) )
|
if ( unlikely( !algo_gate.ready_to_mine( &work, &stratum, thr_id ) ) )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// LP_SCANTIME overrides opt_scantime option, is this right?
|
// opt_scantime expressed in hashes
|
||||||
|
max64 = opt_scantime * thr_hashrates[thr_id];
|
||||||
// adjust max_nonce to meet target scan time. Stratum and longpoll
|
|
||||||
// can go longer because they can rely on restart_threads to signal
|
|
||||||
// an early abort. get_work on the other hand can't rely on
|
|
||||||
// restart_threads so need a much shorter scantime
|
|
||||||
if ( have_stratum )
|
|
||||||
max64 = 60 * thr_hashrates[thr_id];
|
|
||||||
else if ( have_longpoll )
|
|
||||||
max64 = LP_SCANTIME * thr_hashrates[thr_id];
|
|
||||||
else // getwork inline
|
|
||||||
max64 = opt_scantime * thr_hashrates[thr_id];
|
|
||||||
|
|
||||||
// time limit
|
// time limit
|
||||||
if ( unlikely( opt_time_limit && firstwork_time ) )
|
if ( unlikely( opt_time_limit ) )
|
||||||
{
|
{
|
||||||
int passed = (int)( time(NULL) - firstwork_time );
|
unsigned int now = (unsigned int)time(NULL);
|
||||||
int remain = (int)( opt_time_limit - passed );
|
if ( now >= time_limit_stop )
|
||||||
if ( remain < 0 )
|
|
||||||
{
|
{
|
||||||
if ( thr_id != 0 )
|
if ( thr_id != 0 )
|
||||||
{
|
{
|
||||||
@@ -2338,14 +2343,16 @@ static void *miner_thread( void *userdata )
|
|||||||
char rate[32];
|
char rate[32];
|
||||||
format_hashrate( global_hashrate, rate );
|
format_hashrate( global_hashrate, rate );
|
||||||
applog( LOG_NOTICE, "Benchmark: %s", rate );
|
applog( LOG_NOTICE, "Benchmark: %s", rate );
|
||||||
fprintf(stderr, "%llu\n", (unsigned long long)global_hashrate);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
applog( LOG_NOTICE,
|
applog( LOG_NOTICE, "Mining timeout of %ds reached, exiting...",
|
||||||
"Mining timeout of %ds reached, exiting...", opt_time_limit);
|
opt_time_limit);
|
||||||
proper_exit(0);
|
|
||||||
|
proper_exit(0);
|
||||||
}
|
}
|
||||||
if ( remain < max64 ) max64 = remain;
|
// else
|
||||||
|
if ( time_limit_stop - now < opt_scantime )
|
||||||
|
max64 = ( time_limit_stop - now ) * thr_hashrates[thr_id] ;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Select nonce range based on max64, the estimated number of hashes
|
// Select nonce range based on max64, the estimated number of hashes
|
||||||
@@ -2361,8 +2368,6 @@ static void *miner_thread( void *userdata )
|
|||||||
max_nonce = work_nonce + (uint32_t)max64;
|
max_nonce = work_nonce + (uint32_t)max64;
|
||||||
|
|
||||||
// init time
|
// init time
|
||||||
if ( firstwork_time == 0 )
|
|
||||||
firstwork_time = time(NULL);
|
|
||||||
hashes_done = 0;
|
hashes_done = 0;
|
||||||
gettimeofday( (struct timeval *) &tv_start, NULL );
|
gettimeofday( (struct timeval *) &tv_start, NULL );
|
||||||
|
|
||||||
@@ -2435,7 +2440,7 @@ static void *miner_thread( void *userdata )
|
|||||||
{
|
{
|
||||||
double hashrate = 0.;
|
double hashrate = 0.;
|
||||||
pthread_mutex_lock( &stats_lock );
|
pthread_mutex_lock( &stats_lock );
|
||||||
for ( i = 0; i < opt_n_threads; i++ )
|
for ( int i = 0; i < opt_n_threads; i++ )
|
||||||
hashrate += thr_hashrates[i];
|
hashrate += thr_hashrates[i];
|
||||||
global_hashrate = hashrate;
|
global_hashrate = hashrate;
|
||||||
pthread_mutex_unlock( &stats_lock );
|
pthread_mutex_unlock( &stats_lock );
|
||||||
@@ -2729,6 +2734,18 @@ void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
|||||||
sctx->job.final_sapling_hash );
|
sctx->job.final_sapling_hash );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Loop is out of order:
|
||||||
|
//
|
||||||
|
// connect/reconnect
|
||||||
|
// handle message
|
||||||
|
// get new message
|
||||||
|
//
|
||||||
|
// change to
|
||||||
|
// connect/reconnect
|
||||||
|
// get new message
|
||||||
|
// handle message
|
||||||
|
|
||||||
|
|
||||||
static void *stratum_thread(void *userdata )
|
static void *stratum_thread(void *userdata )
|
||||||
{
|
{
|
||||||
struct thr_info *mythr = (struct thr_info *) userdata;
|
struct thr_info *mythr = (struct thr_info *) userdata;
|
||||||
@@ -2746,6 +2763,7 @@ static void *stratum_thread(void *userdata )
|
|||||||
if ( unlikely( stratum_need_reset ) )
|
if ( unlikely( stratum_need_reset ) )
|
||||||
{
|
{
|
||||||
stratum_need_reset = false;
|
stratum_need_reset = false;
|
||||||
|
gettimeofday( &stratum_reset_time, NULL );
|
||||||
stratum_down = true;
|
stratum_down = true;
|
||||||
stratum_errors++;
|
stratum_errors++;
|
||||||
stratum_disconnect( &stratum );
|
stratum_disconnect( &stratum );
|
||||||
@@ -2756,7 +2774,7 @@ static void *stratum_thread(void *userdata )
|
|||||||
applog(LOG_BLUE, "Connection changed to %s", short_url);
|
applog(LOG_BLUE, "Connection changed to %s", short_url);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
applog(LOG_WARNING, "Stratum connection reset");
|
applog(LOG_BLUE, "Stratum connection reset");
|
||||||
// reset stats queue as well
|
// reset stats queue as well
|
||||||
restart_threads();
|
restart_threads();
|
||||||
if ( s_get_ptr != s_put_ptr ) s_get_ptr = s_put_ptr = 0;
|
if ( s_get_ptr != s_put_ptr ) s_get_ptr = s_put_ptr = 0;
|
||||||
@@ -2788,15 +2806,12 @@ static void *stratum_thread(void *userdata )
|
|||||||
{
|
{
|
||||||
stratum_down = false;
|
stratum_down = false;
|
||||||
applog(LOG_BLUE,"Stratum connection established" );
|
applog(LOG_BLUE,"Stratum connection established" );
|
||||||
|
if ( stratum.new_job ) // prime first job
|
||||||
|
stratum_gen_work( &stratum, &g_work );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
report_summary_log( ( stratum_diff != stratum.job.diff )
|
// Wait for new message from server
|
||||||
&& ( stratum_diff != 0. ) );
|
|
||||||
|
|
||||||
if ( stratum.new_job )
|
|
||||||
stratum_gen_work( &stratum, &g_work );
|
|
||||||
|
|
||||||
if ( likely( stratum_socket_full( &stratum, opt_timeout ) ) )
|
if ( likely( stratum_socket_full( &stratum, opt_timeout ) ) )
|
||||||
{
|
{
|
||||||
if ( likely( s = stratum_recv_line( &stratum ) ) )
|
if ( likely( s = stratum_recv_line( &stratum ) ) )
|
||||||
@@ -2819,6 +2834,53 @@ static void *stratum_thread(void *userdata )
|
|||||||
// stratum_disconnect( &stratum );
|
// stratum_disconnect( &stratum );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
report_summary_log( ( stratum_diff != stratum.job.diff )
|
||||||
|
&& ( stratum_diff != 0. ) );
|
||||||
|
|
||||||
|
if ( !stratum_need_reset )
|
||||||
|
{
|
||||||
|
// Is keepalive needed? Mutex would normally be required but that
|
||||||
|
// would block any attempt to submit a share. A share is more
|
||||||
|
// important even if it messes up the keepalive.
|
||||||
|
|
||||||
|
if ( opt_stratum_keepalive )
|
||||||
|
{
|
||||||
|
struct timeval now, et;
|
||||||
|
gettimeofday( &now, NULL );
|
||||||
|
// any shares submitted since last keepalive?
|
||||||
|
if ( last_submit_time.tv_sec > stratum_keepalive_timer.tv_sec )
|
||||||
|
memcpy( &stratum_keepalive_timer, &last_submit_time,
|
||||||
|
sizeof (struct timeval) );
|
||||||
|
|
||||||
|
timeval_subtract( &et, &now, &stratum_keepalive_timer );
|
||||||
|
|
||||||
|
if ( et.tv_sec > stratum_keepalive_timeout )
|
||||||
|
{
|
||||||
|
double diff = stratum.job.diff * 0.5;
|
||||||
|
stratum_keepalive_timer = now;
|
||||||
|
if ( !opt_quiet )
|
||||||
|
applog( LOG_BLUE,
|
||||||
|
"Stratum keepalive requesting lower difficulty" );
|
||||||
|
stratum_suggest_difficulty( &stratum, diff );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( last_submit_time.tv_sec > stratum_reset_time.tv_sec )
|
||||||
|
timeval_subtract( &et, &now, &last_submit_time );
|
||||||
|
else
|
||||||
|
timeval_subtract( &et, &now, &stratum_reset_time );
|
||||||
|
|
||||||
|
if ( et.tv_sec > stratum_keepalive_timeout + 60 )
|
||||||
|
{
|
||||||
|
applog( LOG_NOTICE, "No shares submitted, resetting stratum connection" );
|
||||||
|
stratum_need_reset = true;
|
||||||
|
stratum_keepalive_timer = now;
|
||||||
|
}
|
||||||
|
} // stratum_keepalive
|
||||||
|
|
||||||
|
if ( stratum.new_job && !stratum_need_reset )
|
||||||
|
stratum_gen_work( &stratum, &g_work );
|
||||||
|
|
||||||
|
} // stratum_need_reset
|
||||||
} // loop
|
} // loop
|
||||||
out:
|
out:
|
||||||
return NULL;
|
return NULL;
|
||||||
@@ -2990,8 +3052,8 @@ static bool cpu_capability( bool display_only )
|
|||||||
use_avx512 = cpu_has_avx512 && sw_has_avx512 && algo_has_avx512;
|
use_avx512 = cpu_has_avx512 && sw_has_avx512 && algo_has_avx512;
|
||||||
use_sha = cpu_has_sha && sw_has_sha && algo_has_sha;
|
use_sha = cpu_has_sha && sw_has_sha && algo_has_sha;
|
||||||
use_vaes = cpu_has_vaes && sw_has_vaes && algo_has_vaes;
|
use_vaes = cpu_has_vaes && sw_has_vaes && algo_has_vaes;
|
||||||
use_none = !( use_sse2 || use_aes || use_avx512 || use_avx2 ||
|
use_none = !( use_sse2 || use_sse42 || use_avx || use_aes || use_avx512
|
||||||
use_sha || use_vaes );
|
|| use_avx2 || use_sha || use_vaes );
|
||||||
|
|
||||||
// Display best options
|
// Display best options
|
||||||
printf( "\nStarting miner with" );
|
printf( "\nStarting miner with" );
|
||||||
@@ -3407,7 +3469,8 @@ void parse_arg(int key, char *arg )
|
|||||||
break;
|
break;
|
||||||
case 1021: // cpu-priority
|
case 1021: // cpu-priority
|
||||||
v = atoi(arg);
|
v = atoi(arg);
|
||||||
if (v < 0 || v > 5) /* sanity check */
|
applog(LOG_NOTICE,"--cpu-priority is deprecated and will be removed from a future release");
|
||||||
|
if (v < 0 || v > 5) /* sanity check */
|
||||||
show_usage_and_exit(1);
|
show_usage_and_exit(1);
|
||||||
opt_priority = v;
|
opt_priority = v;
|
||||||
break;
|
break;
|
||||||
@@ -3443,14 +3506,18 @@ void parse_arg(int key, char *arg )
|
|||||||
break;
|
break;
|
||||||
case 1024:
|
case 1024:
|
||||||
opt_randomize = true;
|
opt_randomize = true;
|
||||||
break;
|
applog(LOG_NOTICE,"--randomize is deprecated and will be removed from a future release");
|
||||||
|
break;
|
||||||
case 1027: // data-file
|
case 1027: // data-file
|
||||||
opt_data_file = strdup( arg );
|
opt_data_file = strdup( arg );
|
||||||
break;
|
break;
|
||||||
case 1028: // verify
|
case 1028: // verify
|
||||||
opt_verify = true;
|
opt_verify = true;
|
||||||
break;
|
break;
|
||||||
case 'V':
|
case 1029: // stratum-keepalive
|
||||||
|
opt_stratum_keepalive = true;
|
||||||
|
break;
|
||||||
|
case 'V':
|
||||||
display_cpu_capability();
|
display_cpu_capability();
|
||||||
exit(0);
|
exit(0);
|
||||||
case 'h':
|
case 'h':
|
||||||
@@ -3625,6 +3692,17 @@ int main(int argc, char *argv[])
|
|||||||
show_usage_and_exit(1);
|
show_usage_and_exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( !opt_scantime )
|
||||||
|
{
|
||||||
|
if ( have_stratum ) opt_scantime = 30;
|
||||||
|
else if ( have_longpoll ) opt_scantime = LP_SCANTIME;
|
||||||
|
else opt_scantime = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( opt_time_limit )
|
||||||
|
time_limit_stop = (unsigned int)time(NULL) + opt_time_limit;
|
||||||
|
|
||||||
|
|
||||||
// need to register to get algo optimizations for cpu capabilities
|
// need to register to get algo optimizations for cpu capabilities
|
||||||
// but that causes registration logs before cpu capabilities is output.
|
// but that causes registration logs before cpu capabilities is output.
|
||||||
// Would need to split register function into 2 parts. First part sets algo
|
// Would need to split register function into 2 parts. First part sets algo
|
||||||
@@ -3833,6 +3911,8 @@ int main(int argc, char *argv[])
|
|||||||
if ( opt_debug )
|
if ( opt_debug )
|
||||||
applog(LOG_INFO,"Creating stratum thread");
|
applog(LOG_INFO,"Creating stratum thread");
|
||||||
|
|
||||||
|
stratum.new_job = false; // just to make sure
|
||||||
|
|
||||||
/* init stratum thread info */
|
/* init stratum thread info */
|
||||||
stratum_thr_id = opt_n_threads + 2;
|
stratum_thr_id = opt_n_threads + 2;
|
||||||
thr = &thr_info[stratum_thr_id];
|
thr = &thr_info[stratum_thr_id];
|
||||||
@@ -3899,6 +3979,8 @@ int main(int argc, char *argv[])
|
|||||||
gettimeofday( &last_submit_time, NULL );
|
gettimeofday( &last_submit_time, NULL );
|
||||||
memcpy( &five_min_start, &last_submit_time, sizeof (struct timeval) );
|
memcpy( &five_min_start, &last_submit_time, sizeof (struct timeval) );
|
||||||
memcpy( &session_start, &last_submit_time, sizeof (struct timeval) );
|
memcpy( &session_start, &last_submit_time, sizeof (struct timeval) );
|
||||||
|
memcpy( &stratum_keepalive_timer, &last_submit_time, sizeof (struct timeval) );
|
||||||
|
memcpy( &stratum_reset_time, &last_submit_time, sizeof (struct timeval) );
|
||||||
memcpy( &total_hashes_time, &last_submit_time, sizeof (struct timeval) );
|
memcpy( &total_hashes_time, &last_submit_time, sizeof (struct timeval) );
|
||||||
pthread_mutex_unlock( &stats_lock );
|
pthread_mutex_unlock( &stats_lock );
|
||||||
|
|
||||||
|
20
miner.h
20
miner.h
@@ -466,6 +466,7 @@ void stratum_disconnect(struct stratum_ctx *sctx);
|
|||||||
bool stratum_subscribe(struct stratum_ctx *sctx);
|
bool stratum_subscribe(struct stratum_ctx *sctx);
|
||||||
bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass);
|
bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass);
|
||||||
bool stratum_handle_method(struct stratum_ctx *sctx, const char *s);
|
bool stratum_handle_method(struct stratum_ctx *sctx, const char *s);
|
||||||
|
bool stratum_suggest_difficulty( struct stratum_ctx *sctx, double diff );
|
||||||
|
|
||||||
|
|
||||||
extern bool aes_ni_supported;
|
extern bool aes_ni_supported;
|
||||||
@@ -823,6 +824,7 @@ Options:\n\
|
|||||||
qubit Qubit\n\
|
qubit Qubit\n\
|
||||||
scrypt scrypt(1024, 1, 1) (default)\n\
|
scrypt scrypt(1024, 1, 1) (default)\n\
|
||||||
scrypt:N scrypt(N, 1, 1)\n\
|
scrypt:N scrypt(N, 1, 1)\n\
|
||||||
|
scryptn2 scrypt(1048576, 1,1)\n\
|
||||||
sha256d Double SHA-256\n\
|
sha256d Double SHA-256\n\
|
||||||
sha256q Quad SHA-256, Pyrite (PYE)\n\
|
sha256q Quad SHA-256, Pyrite (PYE)\n\
|
||||||
sha256t Triple SHA-256, Onecoin (OC)\n\
|
sha256t Triple SHA-256, Onecoin (OC)\n\
|
||||||
@@ -885,10 +887,10 @@ Options:\n\
|
|||||||
-T, --timeout=N timeout for long poll and stratum (default: 300 seconds)\n\
|
-T, --timeout=N timeout for long poll and stratum (default: 300 seconds)\n\
|
||||||
-s, --scantime=N upper bound on time spent scanning current work when\n\
|
-s, --scantime=N upper bound on time spent scanning current work when\n\
|
||||||
long polling is unavailable, in seconds (default: 5)\n\
|
long polling is unavailable, in seconds (default: 5)\n\
|
||||||
--randomize Randomize scan range start to reduce duplicates\n\
|
--randomize randomize scan range (deprecated)\n\
|
||||||
-f, --diff-factor=N Divide req. difficulty by this factor (std is 1.0)\n\
|
-f, --diff-factor=N divide req. difficulty by this factor (std is 1.0)\n\
|
||||||
-m, --diff-multiplier=N Multiply difficulty by this factor (std is 1.0)\n\
|
-m, --diff-multiplier=N Multiply difficulty by this factor (std is 1.0)\n\
|
||||||
--hash-meter Display thread hash rates\n\
|
--hash-meter display thread hash rates\n\
|
||||||
--coinbase-addr=ADDR payout address for solo mining\n\
|
--coinbase-addr=ADDR payout address for solo mining\n\
|
||||||
--coinbase-sig=TEXT data to insert in the coinbase when possible\n\
|
--coinbase-sig=TEXT data to insert in the coinbase when possible\n\
|
||||||
--no-longpoll disable long polling support\n\
|
--no-longpoll disable long polling support\n\
|
||||||
@@ -909,15 +911,16 @@ Options:\n\
|
|||||||
-B, --background run the miner in the background\n\
|
-B, --background run the miner in the background\n\
|
||||||
--benchmark run in offline benchmark mode\n\
|
--benchmark run in offline benchmark mode\n\
|
||||||
--cpu-affinity set process affinity to cpu core(s), mask 0x3 for cores 0 and 1\n\
|
--cpu-affinity set process affinity to cpu core(s), mask 0x3 for cores 0 and 1\n\
|
||||||
--cpu-priority set process priority (default: 0 idle, 2 normal to 5 highest)\n\
|
--cpu-priority set process priority (default: 0 idle, 2 normal to 5 highest) (deprecated)\n\
|
||||||
-b, --api-bind=address[:port] IP address for the miner API, default port is 4048)\n\
|
-b, --api-bind=address[:port] IP address for the miner API, default port is 4048)\n\
|
||||||
--api-remote Allow remote control\n\
|
--api-remote allow remote control\n\
|
||||||
--max-temp=N Only mine if cpu temp is less than specified value (linux)\n\
|
--max-temp=N only mine if cpu temp is less than specified value (linux)\n\
|
||||||
--max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\
|
--max-rate=N[KMG] only mine if net hashrate is less than specified value\n\
|
||||||
--max-diff=N Only mine if net difficulty is less than specified value\n\
|
--max-diff=N only mine if net difficulty is less than specified value\n\
|
||||||
-c, --config=FILE load a JSON-format configuration file\n\
|
-c, --config=FILE load a JSON-format configuration file\n\
|
||||||
--data-file=FILE path and name of data file\n\
|
--data-file=FILE path and name of data file\n\
|
||||||
--verify enable additional time consuming start up tests\n\
|
--verify enable additional time consuming start up tests\n\
|
||||||
|
--stratum-keepalive prevent disconnects when difficulty is too high\n\
|
||||||
-V, --version display version and CPU information and exit\n\
|
-V, --version display version and CPU information and exit\n\
|
||||||
-h, --help display this help text and exit\n\
|
-h, --help display this help text and exit\n\
|
||||||
";
|
";
|
||||||
@@ -987,6 +990,7 @@ static struct option const options[] = {
|
|||||||
{ "userpass", 1, NULL, 'O' },
|
{ "userpass", 1, NULL, 'O' },
|
||||||
{ "data-file", 1, NULL, 1027 },
|
{ "data-file", 1, NULL, 1027 },
|
||||||
{ "verify", 0, NULL, 1028 },
|
{ "verify", 0, NULL, 1028 },
|
||||||
|
{ "stratum-keepalive", 0, NULL, 1029 },
|
||||||
{ "version", 0, NULL, 'V' },
|
{ "version", 0, NULL, 'V' },
|
||||||
{ 0, 0, 0, 0 }
|
{ 0, 0, 0, 0 }
|
||||||
};
|
};
|
||||||
|
1226
simd-utils/intrlv.h
1226
simd-utils/intrlv.h
File diff suppressed because it is too large
Load Diff
@@ -272,9 +272,19 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Mask making
|
||||||
|
|
||||||
|
// Equivalent of AVX512 _mm_movepi64_mask & _mm_movepi32_mask.
|
||||||
|
// Returns 2 or 4 bit integer mask from MSB of 64 or 32 bit elements.
|
||||||
|
|
||||||
|
#define mm_movmask_64( v ) \
|
||||||
|
_mm_castpd_si128( _mm_movmask_pd( _mm_castsi128_pd( v ) ) )
|
||||||
|
|
||||||
|
#define mm_movmask_32( v ) \
|
||||||
|
_mm_castps_si128( _mm_movmask_ps( _mm_castsi128_ps( v ) ) )
|
||||||
|
|
||||||
|
|
||||||
// Diagonal blend: d = s3[3], s2[2], s1[1], s0[0] ||
|
// Diagonal blend
|
||||||
|
|
||||||
// Blend 4 32 bit elements from 4 vectors
|
// Blend 4 32 bit elements from 4 vectors
|
||||||
|
|
||||||
@@ -284,7 +294,7 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
|
|||||||
mm_blend_epi32( _mm_blend_epi32( s3, s2, 0x4 ), \
|
mm_blend_epi32( _mm_blend_epi32( s3, s2, 0x4 ), \
|
||||||
_mm_blend_epi32( s1, s0, 0x1 ), 0x3 )
|
_mm_blend_epi32( s1, s0, 0x1 ), 0x3 )
|
||||||
|
|
||||||
#elif defined(__SSE4_1)
|
#elif defined(__SSE4_1__)
|
||||||
|
|
||||||
#define mm128_diagonal_32( v3, v2, v1, v0 ) \
|
#define mm128_diagonal_32( v3, v2, v1, v0 ) \
|
||||||
mm_blend_epi16( _mm_blend_epi16( s3, s2, 0x30 ), \
|
mm_blend_epi16( _mm_blend_epi16( s3, s2, 0x30 ), \
|
||||||
@@ -401,6 +411,16 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
|
|||||||
#define mm128_rol_16( v, c ) \
|
#define mm128_rol_16( v, c ) \
|
||||||
_mm_or_si128( _mm_slli_epi16( v, c ), _mm_srli_epi16( v, 16-(c) ) )
|
_mm_or_si128( _mm_slli_epi16( v, c ), _mm_srli_epi16( v, 16-(c) ) )
|
||||||
|
|
||||||
|
// Limited 2 input shuffle
|
||||||
|
#define mm128_shuffle2_64( a, b, c ) \
|
||||||
|
_mm_castpd_si128( _mm_shuffle_pd( _mm_castsi128_pd( a ), \
|
||||||
|
_mm_castsi128_pd( b ), c ) );
|
||||||
|
|
||||||
|
#define mm128_shuffle2_32( a, b, c ) \
|
||||||
|
_mm_castps_si128( _mm_shuffle_ps( _mm_castsi128_ps( a ), \
|
||||||
|
_mm_castsi128_ps( b ), c ) );
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Rotate vector elements accross all lanes
|
// Rotate vector elements accross all lanes
|
||||||
|
|
||||||
@@ -532,9 +552,8 @@ static inline void mm128_block_bswap_32( __m128i *d, const __m128i *s )
|
|||||||
#if defined(__SSSE3__)
|
#if defined(__SSSE3__)
|
||||||
|
|
||||||
// Function macro with two inputs and one output, inputs are preserved.
|
// Function macro with two inputs and one output, inputs are preserved.
|
||||||
// Returns modified first arg.
|
|
||||||
// Two input functions are not available without SSSE3. Use procedure
|
// Two input functions are not available without SSSE3. Use procedure
|
||||||
// belowe instead.
|
// macros below instead.
|
||||||
|
|
||||||
#define mm128_shufl2r_64( v1, v2 ) _mm_alignr_epi8( v2, v1, 8 )
|
#define mm128_shufl2r_64( v1, v2 ) _mm_alignr_epi8( v2, v1, 8 )
|
||||||
#define mm128_shufl2l_64( v1, v2 ) _mm_alignr_epi8( v1, v2, 8 )
|
#define mm128_shufl2l_64( v1, v2 ) _mm_alignr_epi8( v1, v2, 8 )
|
||||||
@@ -548,12 +567,11 @@ static inline void mm128_block_bswap_32( __m128i *d, const __m128i *s )
|
|||||||
#define mm128_shufl2r_8( v1, v2 ) _mm_alignr_epi8( v2, v1, 8 )
|
#define mm128_shufl2r_8( v1, v2 ) _mm_alignr_epi8( v2, v1, 8 )
|
||||||
#define mm128_shufl2l_8( v1, v2 ) _mm_alignr_epi8( v1, v2, 8 )
|
#define mm128_shufl2l_8( v1, v2 ) _mm_alignr_epi8( v1, v2, 8 )
|
||||||
|
|
||||||
// Procedure macroswith 2 inputs and 2 outputs, inputs are destroyed.
|
// Procedure macros with 2 inputs and 2 outputs, inputs args are overwritten.
|
||||||
// Returns both modified args in place.
|
|
||||||
|
|
||||||
// These macros retain the vrol/vror name for now to avoid
|
// These macros retain the vrol/vror name for now to avoid
|
||||||
// confusion with the shufl2r/shuffle2l function macros above.
|
// confusion with the shufl2r/shuffle2l function macros above.
|
||||||
// These may be renamed to something like shufl2r2 for 2 1nputs and
|
// These may be renamed to something like shufl2r2 for 2 nputs and
|
||||||
// 2 outputs, ie SHUFfLe 2 inputs Right with 2 outputs.
|
// 2 outputs, ie SHUFfLe 2 inputs Right with 2 outputs.
|
||||||
|
|
||||||
#define mm128_vror256_64( v1, v2 ) \
|
#define mm128_vror256_64( v1, v2 ) \
|
||||||
|
@@ -233,6 +233,18 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Mask making
|
||||||
|
|
||||||
|
// Equivalent of AVX512 _mm256_movepi64_mask & _mm256_movepi32_mask.
|
||||||
|
// Returns 4 or 8 bit integer mask from MSB of 64 or 32 bit elements.
|
||||||
|
|
||||||
|
#define mm256_movmask_64( v ) \
|
||||||
|
_mm256_castpd_si256( _mm256_movmask_pd( _mm256_castsi256_pd( v ) ) )
|
||||||
|
|
||||||
|
#define mm256_movmask_32( v ) \
|
||||||
|
_mm256_castps_si256( _mm256_movmask_ps( _mm256_castsi256_ps( v ) ) )
|
||||||
|
|
||||||
|
|
||||||
// Diagonal blending
|
// Diagonal blending
|
||||||
|
|
||||||
// Blend 4 64 bit elements from 4 vectors
|
// Blend 4 64 bit elements from 4 vectors
|
||||||
@@ -405,6 +417,16 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )
|
|||||||
//
|
//
|
||||||
// Rotate elements within each 128 bit lane of 256 bit vector.
|
// Rotate elements within each 128 bit lane of 256 bit vector.
|
||||||
|
|
||||||
|
// Limited 2 input shuffle
|
||||||
|
#define mm256_shuffle2_64( a, b, c ) \
|
||||||
|
_mm256_castpd_si256( _mm256_shuffle_pd( _mm256_castsi256_pd( a ), \
|
||||||
|
_mm256_castsi256_pd( b ), c ) );
|
||||||
|
|
||||||
|
#define mm256_shuffle2_32( a, b, c ) \
|
||||||
|
_mm256_castps_si256( _mm256_shuffle_ps( _mm256_castsi256_ps( a ), \
|
||||||
|
_mm256_castsi256_ps( b ), c ) );
|
||||||
|
|
||||||
|
|
||||||
#define mm256_swap128_64( v ) _mm256_shuffle_epi32( v, 0x4e )
|
#define mm256_swap128_64( v ) _mm256_shuffle_epi32( v, 0x4e )
|
||||||
#define mm256_shuflr128_64 mm256_swap128_64
|
#define mm256_shuflr128_64 mm256_swap128_64
|
||||||
#define mm256_shufll128_64 mm256_swap128_64
|
#define mm256_shufll128_64 mm256_swap128_64
|
||||||
@@ -485,20 +507,6 @@ static inline __m256i mm256_shuflr128_x8( const __m256i v, const int c )
|
|||||||
v2 = _mm256_xor_si256( v1, v2 ); \
|
v2 = _mm256_xor_si256( v1, v2 ); \
|
||||||
v1 = _mm256_xor_si256( v1, v2 );
|
v1 = _mm256_xor_si256( v1, v2 );
|
||||||
|
|
||||||
#define mm256_vror512_128( v1, v2 ) \
|
|
||||||
do { \
|
|
||||||
__m256i t = _mm256_permute2x128( v1, v2, 0x03 ); \
|
|
||||||
v1 = _mm256_permute2x128( v2, v1, 0x21 ); \
|
|
||||||
v2 = t; \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#define mm256_vrol512_128( v1, v2 ) \
|
|
||||||
do { \
|
|
||||||
__m256i t = _mm256_permute2x128( v1, v2, 0x03 ); \
|
|
||||||
v2 = _mm256_permute2x128( v2, v1, 0x21 ); \
|
|
||||||
v1 = t; \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#endif // __AVX2__
|
#endif // __AVX2__
|
||||||
#endif // SIMD_256_H__
|
#endif // SIMD_256_H__
|
||||||
|
|
||||||
|
@@ -493,7 +493,7 @@ static inline __m512i mm512_shufll_32( const __m512i v )
|
|||||||
static inline __m512i mm512_shuflr_x64( const __m512i v, const int n )
|
static inline __m512i mm512_shuflr_x64( const __m512i v, const int n )
|
||||||
{ return _mm512_alignr_epi64( v, v, n ); }
|
{ return _mm512_alignr_epi64( v, v, n ); }
|
||||||
|
|
||||||
static inline __m512i mm512_shufll_x32( const __m512i v, const int n )
|
static inline __m512i mm512_shuflr_x32( const __m512i v, const int n )
|
||||||
{ return _mm512_alignr_epi32( v, v, n ); }
|
{ return _mm512_alignr_epi32( v, v, n ); }
|
||||||
|
|
||||||
#define mm512_shuflr_16( v ) \
|
#define mm512_shuflr_16( v ) \
|
||||||
@@ -581,8 +581,17 @@ static inline __m512i mm512_shufll_x32( const __m512i v, const int n )
|
|||||||
0x0e0d0c0b0a090807, 0x060504030201001f ) )
|
0x0e0d0c0b0a090807, 0x060504030201001f ) )
|
||||||
|
|
||||||
//
|
//
|
||||||
// Shuffle-roate elements within 128 bit lanes of 512 bit vector.
|
// Shuffle/rotate elements within 128 bit lanes of 512 bit vector.
|
||||||
|
|
||||||
|
// Limited 2 input, 1 output shuffle within 128 bit lanes.
|
||||||
|
#define mm512_shuffle2_64( a, b, c ) \
|
||||||
|
_mm512_castpd_si512( _mm512_shuffle_pd( _mm512_castsi512_pd( a ), \
|
||||||
|
_mm512_castsi512_pd( b ), c ) );
|
||||||
|
|
||||||
|
#define mm512_shuffle2_32( a, b, c ) \
|
||||||
|
_mm512_castps_si512( _mm512_shuffle_ps( _mm512_castsi512_ps( a ), \
|
||||||
|
_mm512_castsi512_ps( b ), c ) );
|
||||||
|
|
||||||
// Swap 64 bits in each 128 bit lane
|
// Swap 64 bits in each 128 bit lane
|
||||||
#define mm512_swap128_64( v ) _mm512_shuffle_epi32( v, 0x4e )
|
#define mm512_swap128_64( v ) _mm512_shuffle_epi32( v, 0x4e )
|
||||||
#define mm512_shuflr128_64 mm512_swap128_64
|
#define mm512_shuflr128_64 mm512_swap128_64
|
||||||
@@ -610,6 +619,7 @@ static inline __m512i mm512_shuflr128_8( const __m512i v, const int c )
|
|||||||
// shufl2r is 2 input ...
|
// shufl2r is 2 input ...
|
||||||
// Drop macros? They can easilly be rebuilt using shufl2 functions
|
// Drop macros? They can easilly be rebuilt using shufl2 functions
|
||||||
|
|
||||||
|
// 2 input, 1 output
|
||||||
// Shuffle concatenated { v1, v2 ) right or left by 256 bits and return
|
// Shuffle concatenated { v1, v2 ) right or left by 256 bits and return
|
||||||
// rotated v1
|
// rotated v1
|
||||||
// visually confusing for shif2r because of arg order. First arg is always
|
// visually confusing for shif2r because of arg order. First arg is always
|
||||||
@@ -627,76 +637,5 @@ static inline __m512i mm512_shuflr128_8( const __m512i v, const int c )
|
|||||||
#define mm512_shufl2r_32( v1, v2 ) _mm512_alignr_epi32( v2, v1, 1 )
|
#define mm512_shufl2r_32( v1, v2 ) _mm512_alignr_epi32( v2, v1, 1 )
|
||||||
#define mm512_shufl2l_32( v1, v2 ) _mm512_alignr_epi32( v1, v2, 1 )
|
#define mm512_shufl2l_32( v1, v2 ) _mm512_alignr_epi32( v1, v2, 1 )
|
||||||
|
|
||||||
// Rotate elements from 2 512 bit vectors in place, source arguments
|
|
||||||
// are overwritten.
|
|
||||||
|
|
||||||
#define mm512_swap1024_512( v1, v2 ) \
|
|
||||||
v1 = _mm512_xor_si512( v1, v2 ); \
|
|
||||||
v2 = _mm512_xor_si512( v1, v2 ); \
|
|
||||||
v1 = _mm512_xor_si512( v1, v2 );
|
|
||||||
#define mm512_shufl2l_512 mm512_swap1024_512 \
|
|
||||||
#define mm512_shufl2r_512 mm512_swap1024_512 \
|
|
||||||
|
|
||||||
// Deprecated, will be removed. Use shufl2 functions instead. Leave them as is
|
|
||||||
// for now.
|
|
||||||
// Rotate elements from 2 512 bit vectors in place, both source arguments
|
|
||||||
// are updated.
|
|
||||||
|
|
||||||
#define mm512_vror1024_256( v1, v2 ) \
|
|
||||||
do { \
|
|
||||||
__m512i t = _mm512_alignr_epi64( v1, v2, 4 ); \
|
|
||||||
v1 = _mm512_alignr_epi64( v2, v1, 4 ); \
|
|
||||||
v2 = t; \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#define mm512_vrol1024_256( v1, v2 ) \
|
|
||||||
do { \
|
|
||||||
__m512i t = _mm512_alignr_epi64( v1, v2, 4 ); \
|
|
||||||
v2 = _mm512_alignr_epi64( v2, v1, 4 ); \
|
|
||||||
v1 = t; \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#define mm512_vror1024_128( v1, v2 ) \
|
|
||||||
do { \
|
|
||||||
__m512i t = _mm512_alignr_epi64( v1, v2, 2 ); \
|
|
||||||
v1 = _mm512_alignr_epi64( v2, v1, 2 ); \
|
|
||||||
v2 = t; \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#define mm512_vrol1024_128( v1, v2 ) \
|
|
||||||
do { \
|
|
||||||
__m512i t = _mm512_alignr_epi64( v1, v2, 6 ); \
|
|
||||||
v2 = _mm512_alignr_epi64( v2, v1, 6 ); \
|
|
||||||
v1 = t; \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#define mm512_vror1024_64( v1, v2 ) \
|
|
||||||
do { \
|
|
||||||
__m512i t = _mm512_alignr_epi64( v1, v2, 1 ); \
|
|
||||||
v1 = _mm512_alignr_epi64( v2, v1, 1 ); \
|
|
||||||
v2 = t; \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#define mm512_vrol1024_64( v1, v2 ) \
|
|
||||||
do { \
|
|
||||||
__m512i t = _mm512_alignr_epi64( v1, v2, 7 ); \
|
|
||||||
v2 = _mm512_alignr_epi64( v2, v1, 7 ); \
|
|
||||||
v1 = t; \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#define mm512_vror1024_32( v1, v2 ) \
|
|
||||||
do { \
|
|
||||||
__m512i t = _mm512_alignr_epi32( v1, v2, 1 ); \
|
|
||||||
v1 = _mm512_alignr_epi32( v2, v1, 1 ); \
|
|
||||||
v2 = t; \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#define mm512_vrol1024_32( v1, v2 ) \
|
|
||||||
do { \
|
|
||||||
__m512i t = _mm512_alignr_epi32( v1, v2, 15 ); \
|
|
||||||
v2 = _mm512_alignr_epi32( v2, v1, 15 ); \
|
|
||||||
v1 = t; \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#endif // AVX512
|
#endif // AVX512
|
||||||
#endif // SIMD_512_H__
|
#endif // SIMD_512_H__
|
||||||
|
@@ -209,7 +209,7 @@ static inline void cpu_getname(char *outbuf, size_t maxsz)
|
|||||||
{
|
{
|
||||||
memset(outbuf, 0, maxsz);
|
memset(outbuf, 0, maxsz);
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
char brand[0xC0] = { 0 };
|
char brand[256] = { 0 };
|
||||||
int output[4] = { 0 }, ext;
|
int output[4] = { 0 }, ext;
|
||||||
cpuid(0x80000000, output);
|
cpuid(0x80000000, output);
|
||||||
ext = output[0];
|
ext = output[0];
|
||||||
|
21
util.c
21
util.c
@@ -1658,7 +1658,7 @@ static bool stratum_parse_extranonce(struct stratum_ctx *sctx, json_t *params, i
|
|||||||
pthread_mutex_unlock(&sctx->work_lock);
|
pthread_mutex_unlock(&sctx->work_lock);
|
||||||
|
|
||||||
if ( !opt_quiet ) /* pool dynamic change */
|
if ( !opt_quiet ) /* pool dynamic change */
|
||||||
applog( LOG_INFO, "Stratum extranonce1= %s, extranonce2 size= %d",
|
applog( LOG_INFO, "Stratum extranonce1 0x%s, extranonce2 size %d",
|
||||||
xnonce1, xn2_size);
|
xnonce1, xn2_size);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@@ -1846,6 +1846,25 @@ out:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool stratum_suggest_difficulty( struct stratum_ctx *sctx, double diff )
|
||||||
|
{
|
||||||
|
char *s;
|
||||||
|
s = (char*) malloc( 80 );
|
||||||
|
bool rc = true;
|
||||||
|
|
||||||
|
// response is handled seperately, what ID?
|
||||||
|
sprintf( s, "{\"id\": 1, \"method\": \"mining.suggest_difficulty\", \"params\": [\"%f\"]}", diff );
|
||||||
|
if ( !stratum_send_line( sctx, s ) )
|
||||||
|
{
|
||||||
|
applog(LOG_WARNING,"stratum.suggest_difficulty send failed");
|
||||||
|
rc = false;
|
||||||
|
}
|
||||||
|
free ( s );
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract bloc height L H... here len=3, height=0x1333e8
|
* Extract bloc height L H... here len=3, height=0x1333e8
|
||||||
* "...0000000000ffffffff2703e83313062f503253482f043d61105408"
|
* "...0000000000ffffffff2703e83313062f503253482f043d61105408"
|
||||||
|
@@ -16,8 +16,8 @@ export MINGW_LIB="/usr/x86_64-w64-mingw32/lib"
|
|||||||
export GCC_MINGW_LIB="/usr/lib/gcc/x86_64-w64-mingw32/9.3-win32"
|
export GCC_MINGW_LIB="/usr/lib/gcc/x86_64-w64-mingw32/9.3-win32"
|
||||||
# used by GCC
|
# used by GCC
|
||||||
export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs -L$LOCAL_LIB/openssl"
|
export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs -L$LOCAL_LIB/openssl"
|
||||||
# support for Windows CPU groups, AES sometimes not included in -march
|
# Support for Windows 7 CPU groups, AES sometimes not included in -march
|
||||||
export DEFAULT_CFLAGS="-O3 -maes -Wall -D_WIN32_WINNT=0x0601"
|
export DEFAULT_CFLAGS="-maes -O3 -Wall -D_WIN32_WINNT=0x0601"
|
||||||
export DEFAULT_CFLAGS_OLD="-O3 -Wall"
|
export DEFAULT_CFLAGS_OLD="-O3 -Wall"
|
||||||
|
|
||||||
# make link to local gmp header file.
|
# make link to local gmp header file.
|
||||||
@@ -26,8 +26,8 @@ ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
|
|||||||
# make release directory and copy selected DLLs.
|
# make release directory and copy selected DLLs.
|
||||||
|
|
||||||
rm -rf release > /dev/null
|
rm -rf release > /dev/null
|
||||||
|
|
||||||
mkdir release
|
mkdir release
|
||||||
|
|
||||||
cp README.txt release/
|
cp README.txt release/
|
||||||
cp README.md release/
|
cp README.md release/
|
||||||
cp RELEASE_NOTES release/
|
cp RELEASE_NOTES release/
|
||||||
|
Reference in New Issue
Block a user