mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
82c2605d77 |
@@ -8,10 +8,9 @@ Security warning
|
||||
|
||||
Miner programs are often flagged as malware by antivirus programs. This is
|
||||
usually a false positive, they are flagged simply because they are
|
||||
cryptocurrency miners. However, some malware masquerading as a miner has
|
||||
been spread using the cover that miners are known to be subject to false
|
||||
positives ans users will dismiss the AV alert. Always be on alert.
|
||||
The source code of cpuminer-opt is open for anyone to inspect.
|
||||
cryptocurrency miners. However, some malware has been spread using the
|
||||
cover that miners are known to be subject to false positives. Always be on
|
||||
alert. The source code of cpuminer-opt is open for anyone to inspect.
|
||||
If you don't trust the software don't download it.
|
||||
|
||||
The cryptographic hashing code has been taken from trusted sources but has been
|
||||
@@ -30,31 +29,12 @@ Requirements
|
||||
Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
|
||||
supported.
|
||||
|
||||
64 bit Linux or Windows operating system. Apple, Android and Raspberry Pi
|
||||
are not supported. FreeBSD YMMV.
|
||||
64 bit Linux or Windows operating system. Apple, Android and Rpi are
|
||||
not supported. FreeBSD YMMV.
|
||||
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.11.2
|
||||
|
||||
Fixed x11gost (sib) AVX2 invalid shares.
|
||||
|
||||
Fixed x16r, x16rv2, x16s, x16rt, x16rt-veil (veil), x21s.
|
||||
No shares were submitted when cube, shavite or echo were the first function
|
||||
in the hash order.
|
||||
|
||||
Fixed all algos reporting stats problems when mining with SSE2.
|
||||
|
||||
Faster Lyra2 AVX512: lyra2z +47%, lyra2rev3 +11%, allium +13%, x21s +6%
|
||||
|
||||
Other minor performance improvements.
|
||||
|
||||
Known issue:
|
||||
|
||||
Lyra2 AVX512 improvements paradoxically reduced performance on x22i and x25x.
|
||||
https://github.com/JayDDee/cpuminer-opt/issues/225
|
||||
|
||||
v3.11.1
|
||||
|
||||
Faster panama for x25x AVX2 & AVX512.
|
||||
|
@@ -62,7 +62,9 @@ int scanhash_argon2( struct work* work, uint32_t max_nonce,
|
||||
argon2hash(hash, endiandata);
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio(work, hash);
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
@@ -43,14 +43,17 @@ int scanhash_blake2b( struct work *work, uint32_t max_nonce,
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
//blake2b_hash_end(vhashcpu, endiandata);
|
||||
blake2b_hash(vhashcpu, endiandata);
|
||||
|
||||
if (vhashcpu[7] < Htarg && fulltest(vhashcpu, ptarget))
|
||||
{
|
||||
if (vhashcpu[7] < Htarg && fulltest(vhashcpu, ptarget)) {
|
||||
work_set_target_ratio(work, vhashcpu);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
submit_solution( work, vhashcpu, mythr );
|
||||
}
|
||||
n++;
|
||||
return 1;
|
||||
}
|
||||
n++;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
|
@@ -77,15 +77,25 @@ int scanhash_decred( struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
if (!thr_id) applog(LOG_DEBUG,"[%d] Target=%08x %08x", thr_id, ptarget[6], ptarget[7]);
|
||||
#endif
|
||||
|
||||
do {
|
||||
//be32enc(&endiandata[DCR_NONCE_OFT32], n);
|
||||
endiandata[DECRED_NONCE_INDEX] = n;
|
||||
decred_hash(hash32, endiandata);
|
||||
|
||||
if (hash32[7] <= HTarget && fulltest(hash32, ptarget))
|
||||
{
|
||||
pdata[DECRED_NONCE_INDEX] = n;
|
||||
submit_solution( work, hash32, mythr );
|
||||
if (hash32[7] <= HTarget && fulltest(hash32, ptarget)) {
|
||||
work_set_target_ratio(work, hash32);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
#ifdef DEBUG_ALGO
|
||||
applog(LOG_BLUE, "Nonce : %08x %08x", n, swab32(n));
|
||||
applog_hash(ptarget);
|
||||
applog_compare_hash(hash32, ptarget);
|
||||
#endif
|
||||
pdata[DECRED_NONCE_INDEX] = n;
|
||||
return 1;
|
||||
}
|
||||
|
||||
n++;
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <memory.h>
|
||||
#include <math.h>
|
||||
#include "simd-utils.h"
|
||||
|
||||
#include "sph_gost.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -696,26 +696,9 @@ static void AddModulo512(const void *a,const void *b,void *c)
|
||||
|
||||
static void AddXor512(const void *a,const void *b,void *c)
|
||||
{
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
casti_m512i( c, 0 ) = _mm512_xor_si512( casti_m512i( a, 0 ),
|
||||
casti_m512i( b, 0 ) );
|
||||
#elif defined(__AVX2__)
|
||||
casti_m256i( c, 0 ) = _mm256_xor_si256( casti_m256i( a, 0 ),
|
||||
casti_m256i( b, 0 ) );
|
||||
casti_m256i( c, 1 ) = _mm256_xor_si256( casti_m256i( a, 1 ),
|
||||
casti_m256i( b, 1 ) );
|
||||
#elif defined(__SSE2__)
|
||||
casti_m128i( c, 0 ) = _mm_xor_si128( casti_m128i( a, 0 ),
|
||||
casti_m128i( b, 0 ) );
|
||||
casti_m128i( c, 1 ) = _mm_xor_si128( casti_m128i( a, 1 ),
|
||||
casti_m128i( b, 1 ) );
|
||||
casti_m128i( c, 2 ) = _mm_xor_si128( casti_m128i( a, 2 ),
|
||||
casti_m128i( b, 2 ) );
|
||||
casti_m128i( c, 3 ) = _mm_xor_si128( casti_m128i( a, 3 ),
|
||||
casti_m128i( b, 3 ) );
|
||||
#else
|
||||
const unsigned long long *A=a, *B=b;
|
||||
const unsigned long long *A=a, *B=b;
|
||||
unsigned long long *C=c;
|
||||
#ifdef FULL_UNROLL
|
||||
C[0] = A[0] ^ B[0];
|
||||
C[1] = A[1] ^ B[1];
|
||||
C[2] = A[2] ^ B[2];
|
||||
@@ -724,6 +707,12 @@ static void AddXor512(const void *a,const void *b,void *c)
|
||||
C[5] = A[5] ^ B[5];
|
||||
C[6] = A[6] ^ B[6];
|
||||
C[7] = A[7] ^ B[7];
|
||||
#else
|
||||
int i = 0;
|
||||
|
||||
for(i=0; i<8; i++) {
|
||||
C[i] = A[i] ^ B[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -904,32 +893,31 @@ static void g_N(const unsigned char *N,unsigned char *h,const unsigned char *m)
|
||||
|
||||
static void hash_X(unsigned char *IV,const unsigned char *message,unsigned long long length,unsigned char *out)
|
||||
{
|
||||
unsigned char v512[64] __attribute__((aligned(64))) = {
|
||||
unsigned char v512[64] = {
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00
|
||||
};
|
||||
unsigned char v0[64] __attribute__((aligned(64))) = {
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
||||
};
|
||||
unsigned char Sigma[64] __attribute__((aligned(64))) = {
|
||||
};
|
||||
unsigned char v0[64] = {
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
||||
};
|
||||
unsigned char N[64] __attribute__((aligned(64))) = {
|
||||
unsigned char Sigma[64] = {
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
||||
};
|
||||
unsigned char m[64] __attribute__((aligned(64)));
|
||||
unsigned char *hash = IV;
|
||||
unsigned char N[64] = {
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
||||
};
|
||||
unsigned char m[64], *hash = IV;
|
||||
unsigned long long len = length;
|
||||
|
||||
// Stage 2
|
||||
@@ -964,7 +952,7 @@ static void hash_X(unsigned char *IV,const unsigned char *message,unsigned long
|
||||
|
||||
static void hash_512(const unsigned char *message, unsigned long long length, unsigned char *out)
|
||||
{
|
||||
unsigned char IV[64] __attribute__((aligned(64))) = {
|
||||
unsigned char IV[64] = {
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
@@ -81,9 +81,9 @@ typedef struct {
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64] __attribute__((aligned(64)));
|
||||
sph_u32 V[5][8] __attribute__((aligned(64)));
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 V[5][8];
|
||||
#endif
|
||||
} sph_gost512_context;
|
||||
|
||||
|
@@ -67,12 +67,8 @@ HashReturn_gr init_groestl( hashState_groestl* ctx, int hashlen )
|
||||
ctx->chaining[i] = _mm_setzero_si128();
|
||||
ctx->buffer[i] = _mm_setzero_si128();
|
||||
}
|
||||
|
||||
// The only non-zero in the IV is len. It can be hard coded.
|
||||
ctx->chaining[ 6 ] = m128_const_64( 0x0200000000000000, 0 );
|
||||
// ((u64*)ctx->chaining)[COLS-1] = U64BIG((u64)LENGTH);
|
||||
// INIT(ctx->chaining);
|
||||
|
||||
((u64*)ctx->chaining)[COLS-1] = U64BIG((u64)LENGTH);
|
||||
INIT(ctx->chaining);
|
||||
ctx->buf_ptr = 0;
|
||||
ctx->rem_ptr = 0;
|
||||
|
||||
|
@@ -42,12 +42,9 @@ int groestl512_4way_init( groestl512_4way_context* ctx, uint64_t hashlen )
|
||||
ctx->buffer[i] = m512_zero;
|
||||
}
|
||||
|
||||
// The only non-zero in the IV is len. It can be hard coded.
|
||||
ctx->chaining[ 6 ] = m512_const2_64( 0x0200000000000000, 0 );
|
||||
// uint64_t len = U64BIG((uint64_t)LENGTH);
|
||||
// ctx->chaining[ COLS/2 -1 ] = _mm512_set4_epi64( len, 0, len, 0 );
|
||||
// INIT_4way(ctx->chaining);
|
||||
|
||||
uint64_t len = U64BIG((uint64_t)LENGTH);
|
||||
ctx->chaining[ COLS/2 -1 ] = _mm512_set4_epi64( len, 0, len, 0 );
|
||||
INIT_4way(ctx->chaining);
|
||||
ctx->buf_ptr = 0;
|
||||
ctx->rem_ptr = 0;
|
||||
|
||||
|
@@ -115,7 +115,7 @@ __m512i ALL_FF;
|
||||
\
|
||||
/* compute z_i : double x_i using temp xmm8 and 1B xmm9 */\
|
||||
/* compute w_i : add y_{i+4} */\
|
||||
b1 = m512_const1_64( 0x1b1b1b1b1b1b1b1b );\
|
||||
b1 = ALL_1B;\
|
||||
MUL2(a0, b0, b1);\
|
||||
a0 = _mm512_xor_si512(a0, TEMP0);\
|
||||
MUL2(a1, b0, b1);\
|
||||
@@ -276,7 +276,7 @@ __m512i ALL_FF;
|
||||
for ( round_counter = 0; round_counter < 14; round_counter += 2) \
|
||||
{ \
|
||||
/* AddRoundConstant Q1024 */\
|
||||
xmm1 = m512_neg1;\
|
||||
xmm1 = ALL_FF;\
|
||||
xmm8 = _mm512_xor_si512( xmm8, xmm1 );\
|
||||
xmm9 = _mm512_xor_si512( xmm9, xmm1 );\
|
||||
xmm10 = _mm512_xor_si512( xmm10, xmm1 );\
|
||||
@@ -298,7 +298,7 @@ __m512i ALL_FF;
|
||||
SUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
|
||||
\
|
||||
/* AddRoundConstant Q1024 */\
|
||||
xmm9 = m512_neg1;\
|
||||
xmm9 = ALL_FF;\
|
||||
xmm0 = _mm512_xor_si512( xmm0, xmm9 );\
|
||||
xmm1 = _mm512_xor_si512( xmm1, xmm9 );\
|
||||
xmm2 = _mm512_xor_si512( xmm2, xmm9 );\
|
||||
|
@@ -150,8 +150,10 @@ int scanhash_bastion( struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[19], n);
|
||||
bastionhash(hash32, endiandata);
|
||||
if (hash32[7] < Htarg && fulltest(hash32, ptarget)) {
|
||||
work_set_target_ratio(work, hash32);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
submit_solution( work, hash32, mythr );
|
||||
return true;
|
||||
}
|
||||
n++;
|
||||
|
||||
|
@@ -117,6 +117,9 @@ int scanhash_jha( struct work *work, uint32_t max_nonce,
|
||||
|
||||
jha_kec_midstate( endiandata );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < 6; m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
@@ -124,9 +127,25 @@ int scanhash_jha( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
jha_hash(hash32, endiandata);
|
||||
if ((!(hash32[7] & mask)) && fulltest(hash32, ptarget))
|
||||
submit_solution( work, hash32, mythr );
|
||||
#ifndef DEBUG_ALGO
|
||||
if ((!(hash32[7] & mask)) && fulltest(hash32, ptarget)) {
|
||||
work_set_target_ratio(work, hash32);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash32[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash32, ptarget)) {
|
||||
work_set_target_ratio(work, hash32);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@@ -196,6 +196,7 @@ inline void reducedDuplexRowSetup_2way( uint64_t *State, uint64_t *rowIn,
|
||||
__m512i* in = (__m512i*)rowIn;
|
||||
__m512i* inout = (__m512i*)rowInOut;
|
||||
__m512i* out = (__m512i*)rowOut + ( (nCols-1) * BLOCK_LEN_M256I );
|
||||
__m512i t0, t1, t2;
|
||||
|
||||
state0 = _mm512_load_si512( (__m512i*)State );
|
||||
state1 = _mm512_load_si512( (__m512i*)State + 1 );
|
||||
@@ -217,27 +218,24 @@ inline void reducedDuplexRowSetup_2way( uint64_t *State, uint64_t *rowIn,
|
||||
out[1] = _mm512_xor_si512( state1, in[1] );
|
||||
out[2] = _mm512_xor_si512( state2, in[2] );
|
||||
|
||||
{
|
||||
register __m512i t0, t1, t2;
|
||||
|
||||
//M[row*][col] = M[row*][col] XOR rotW(rand)
|
||||
t0 = _mm512_permutex_epi64( state0, 0x93 );
|
||||
t1 = _mm512_permutex_epi64( state1, 0x93 );
|
||||
t2 = _mm512_permutex_epi64( state2, 0x93 );
|
||||
//M[row*][col] = M[row*][col] XOR rotW(rand)
|
||||
t0 = _mm512_permutex_epi64( state0, 0x93 );
|
||||
t1 = _mm512_permutex_epi64( state1, 0x93 );
|
||||
t2 = _mm512_permutex_epi64( state2, 0x93 );
|
||||
|
||||
inout[0] = _mm512_xor_si512( inout[0],
|
||||
_mm512_mask_blend_epi64( 0x11, t0, t2 ) );
|
||||
inout[1] = _mm512_xor_si512( inout[1],
|
||||
_mm512_mask_blend_epi64( 0x11, t1, t0 ) );
|
||||
inout[2] = _mm512_xor_si512( inout[2],
|
||||
_mm512_mask_blend_epi64( 0x11, t2, t1 ) );
|
||||
}
|
||||
inout[0] = _mm512_xor_si512( inout[0],
|
||||
_mm512_mask_blend_epi32( 0x0303, t0, t2 ) );
|
||||
inout[1] = _mm512_xor_si512( inout[1],
|
||||
_mm512_mask_blend_epi32( 0x0303, t1, t0 ) );
|
||||
inout[2] = _mm512_xor_si512( inout[2],
|
||||
_mm512_mask_blend_epi32( 0x0303, t2, t1 ) );
|
||||
|
||||
//Inputs: next column (i.e., next block in sequence)
|
||||
in += BLOCK_LEN_M256I;
|
||||
inout += BLOCK_LEN_M256I;
|
||||
//Output: goes to previous column
|
||||
out -= BLOCK_LEN_M256I;
|
||||
|
||||
//Inputs: next column (i.e., next block in sequence)
|
||||
in += BLOCK_LEN_M256I;
|
||||
inout += BLOCK_LEN_M256I;
|
||||
//Output: goes to previous column
|
||||
out -= BLOCK_LEN_M256I;
|
||||
}
|
||||
|
||||
_mm512_store_si512( (__m512i*)State, state0 );
|
||||
@@ -246,6 +244,9 @@ inline void reducedDuplexRowSetup_2way( uint64_t *State, uint64_t *rowIn,
|
||||
_mm512_store_si512( (__m512i*)State + 3, state3 );
|
||||
}
|
||||
|
||||
// big ugly workaound for pointer aliasing, use a union of pointers.
|
||||
// Access matrix using m512i for in and out, m256i for inout
|
||||
|
||||
inline void reducedDuplexRow_2way( uint64_t *State, uint64_t *rowIn,
|
||||
uint64_t *rowInOut0, uint64_t *rowInOut1,
|
||||
uint64_t *rowOut, uint64_t nCols)
|
||||
@@ -256,81 +257,95 @@ inline void reducedDuplexRow_2way( uint64_t *State, uint64_t *rowIn,
|
||||
__m256i *inout0 = (__m256i*)rowInOut0;
|
||||
__m256i *inout1 = (__m256i*)rowInOut1;
|
||||
__m512i *out = (__m512i*)rowOut;
|
||||
register __m512i io0, io1, io2;
|
||||
__m512i io[3];
|
||||
povly inout;
|
||||
inout.v512 = &io[0];
|
||||
__m512i t0, t1, t2;
|
||||
|
||||
state0 = _mm512_load_si512( (__m512i*)State );
|
||||
state1 = _mm512_load_si512( (__m512i*)State + 1 );
|
||||
state2 = _mm512_load_si512( (__m512i*)State + 2 );
|
||||
state3 = _mm512_load_si512( (__m512i*)State + 3 );
|
||||
|
||||
for ( i = 0; i < nCols; i++ )
|
||||
{
|
||||
//Absorbing "M[prev] [+] M[row*]"
|
||||
io0 = _mm512_mask_blend_epi64( 0xf0,
|
||||
_mm512_load_si512( (__m512i*)inout0 ),
|
||||
_mm512_load_si512( (__m512i*)inout1 ) );
|
||||
io1 = _mm512_mask_blend_epi64( 0xf0,
|
||||
_mm512_load_si512( (__m512i*)inout0 +1 ),
|
||||
_mm512_load_si512( (__m512i*)inout1 +1 ) );
|
||||
io2 = _mm512_mask_blend_epi64( 0xf0,
|
||||
_mm512_load_si512( (__m512i*)inout0 +2 ),
|
||||
_mm512_load_si512( (__m512i*)inout1 +2 ) );
|
||||
_mm_prefetch( in, _MM_HINT_T0 );
|
||||
_mm_prefetch( inout0, _MM_HINT_T0 );
|
||||
_mm_prefetch( inout1, _MM_HINT_T0 );
|
||||
_mm_prefetch( in + 2, _MM_HINT_T0 );
|
||||
_mm_prefetch( inout0 + 2, _MM_HINT_T0 );
|
||||
_mm_prefetch( inout1 + 2, _MM_HINT_T0 );
|
||||
_mm_prefetch( in + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( inout0 + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( inout1 + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( in + 6, _MM_HINT_T0 );
|
||||
_mm_prefetch( inout0 + 6, _MM_HINT_T0 );
|
||||
_mm_prefetch( inout1 + 6, _MM_HINT_T0 );
|
||||
|
||||
state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io0 ) );
|
||||
state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io1 ) );
|
||||
state2 = _mm512_xor_si512( state2, _mm512_add_epi64( in[2], io2 ) );
|
||||
|
||||
for ( i = 0; i < nCols; i++ )
|
||||
{
|
||||
|
||||
//Applies the reduced-round transformation f to the sponge's state
|
||||
LYRA_ROUND_2WAY_AVX512( state0, state1, state2, state3 );
|
||||
//Absorbing "M[prev] [+] M[row*]"
|
||||
inout.v256[0] = inout0[0];
|
||||
inout.v256[1] = inout1[1];
|
||||
inout.v256[2] = inout0[2];
|
||||
inout.v256[3] = inout1[3];
|
||||
inout.v256[4] = inout0[4];
|
||||
inout.v256[5] = inout1[5];
|
||||
|
||||
{
|
||||
register __m512i t0, t1, t2;
|
||||
state0 = _mm512_xor_si512( state0,
|
||||
_mm512_add_epi64( in[0], inout.v512[0] ) );
|
||||
state1 = _mm512_xor_si512( state1,
|
||||
_mm512_add_epi64( in[1], inout.v512[1] ) );
|
||||
state2 = _mm512_xor_si512( state2,
|
||||
_mm512_add_epi64( in[2], inout.v512[2] ) );
|
||||
|
||||
//M[rowOut][col] = M[rowOut][col] XOR rand
|
||||
t0 = _mm512_xor_si512( out[0], state0 );
|
||||
t1 = _mm512_xor_si512( out[1], state1 );
|
||||
t2 = _mm512_xor_si512( out[2], state2 );
|
||||
|
||||
// if out is the same row as inout, update with new data.
|
||||
if ( rowOut == rowInOut0 )
|
||||
{
|
||||
io0 = _mm512_mask_blend_epi64( 0x0f, io0, t0 );
|
||||
io1 = _mm512_mask_blend_epi64( 0x0f, io1, t1 );
|
||||
io2 = _mm512_mask_blend_epi64( 0x0f, io2, t2 );
|
||||
}
|
||||
if ( rowOut == rowInOut1 )
|
||||
{
|
||||
io0 = _mm512_mask_blend_epi64( 0xf0, io0, t0 );
|
||||
io1 = _mm512_mask_blend_epi64( 0xf0, io1, t1 );
|
||||
io2 = _mm512_mask_blend_epi64( 0xf0, io2, t2 );
|
||||
}
|
||||
//Applies the reduced-round transformation f to the sponge's state
|
||||
LYRA_ROUND_2WAY_AVX512( state0, state1, state2, state3 );
|
||||
|
||||
out[0] = t0;
|
||||
out[1] = t1;
|
||||
out[2] = t2;
|
||||
//M[rowOut][col] = M[rowOut][col] XOR rand
|
||||
out[0] = _mm512_xor_si512( out[0], state0 );
|
||||
out[1] = _mm512_xor_si512( out[1], state1 );
|
||||
out[2] = _mm512_xor_si512( out[2], state2 );
|
||||
|
||||
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
|
||||
t0 = _mm512_permutex_epi64( state0, 0x93 );
|
||||
t1 = _mm512_permutex_epi64( state1, 0x93 );
|
||||
t2 = _mm512_permutex_epi64( state2, 0x93 );
|
||||
// if inout is the same row as out it was just overwritten, reload.
|
||||
if ( rowOut == rowInOut0 )
|
||||
{
|
||||
inout.v256[0] = inout0[0];
|
||||
inout.v256[2] = inout0[2];
|
||||
inout.v256[4] = inout0[4];
|
||||
}
|
||||
if ( rowOut == rowInOut1 )
|
||||
{
|
||||
inout.v256[1] = inout1[1];
|
||||
inout.v256[3] = inout1[3];
|
||||
inout.v256[5] = inout1[5];
|
||||
}
|
||||
|
||||
io0 = _mm512_xor_si512( io0, _mm512_mask_blend_epi64( 0x11, t0, t2 ) );
|
||||
io1 = _mm512_xor_si512( io1, _mm512_mask_blend_epi64( 0x11, t1, t0 ) );
|
||||
io2 = _mm512_xor_si512( io2, _mm512_mask_blend_epi64( 0x11, t2, t1 ) );
|
||||
}
|
||||
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
|
||||
t0 = _mm512_permutex_epi64( state0, 0x93 );
|
||||
t1 = _mm512_permutex_epi64( state1, 0x93 );
|
||||
t2 = _mm512_permutex_epi64( state2, 0x93 );
|
||||
|
||||
_mm512_mask_store_epi64( (__m512i*)inout0, 0x0f, io0 );
|
||||
_mm512_mask_store_epi64( (__m512i*)inout1, 0xf0, io0 );
|
||||
_mm512_mask_store_epi64( (__m512i*)inout0 +1, 0x0f, io1 );
|
||||
_mm512_mask_store_epi64( (__m512i*)inout1 +1, 0xf0, io1 );
|
||||
_mm512_mask_store_epi64( (__m512i*)inout0 +2, 0x0f, io2 );
|
||||
_mm512_mask_store_epi64( (__m512i*)inout1 +2, 0xf0, io2 );
|
||||
inout.v512[0] = _mm512_xor_si512( inout.v512[0],
|
||||
_mm512_mask_blend_epi32( 0x0303, t0, t2 ) );
|
||||
inout.v512[1] = _mm512_xor_si512( inout.v512[1],
|
||||
_mm512_mask_blend_epi32( 0x0303, t1, t0 ) );
|
||||
inout.v512[2] = _mm512_xor_si512( inout.v512[2],
|
||||
_mm512_mask_blend_epi32( 0x0303, t2, t1 ) );
|
||||
|
||||
inout0[0] = inout.v256[0];
|
||||
inout1[1] = inout.v256[1];
|
||||
inout0[2] = inout.v256[2];
|
||||
inout1[3] = inout.v256[3];
|
||||
inout0[4] = inout.v256[4];
|
||||
inout1[5] = inout.v256[5];
|
||||
|
||||
//Goes to next block
|
||||
in += BLOCK_LEN_M256I;
|
||||
inout0 += BLOCK_LEN_M256I * 2;
|
||||
inout1 += BLOCK_LEN_M256I * 2;
|
||||
out += BLOCK_LEN_M256I;
|
||||
//Goes to next block
|
||||
in += BLOCK_LEN_M256I;
|
||||
inout0 += BLOCK_LEN_M256I * 2;
|
||||
inout1 += BLOCK_LEN_M256I * 2;
|
||||
out += BLOCK_LEN_M256I;
|
||||
}
|
||||
|
||||
_mm512_store_si512( (__m512i*)State, state0 );
|
||||
|
@@ -25,6 +25,7 @@ void nist5hash(void *output, const void *input)
|
||||
sph_skein512_context ctx_skein;
|
||||
sph_jh512_context ctx_jh;
|
||||
sph_keccak512_context ctx_keccak;
|
||||
uint32_t mask = 8;
|
||||
|
||||
sph_blake512_init( &ctx_blake );
|
||||
sph_blake512( &ctx_blake, input, 80 );
|
||||
@@ -58,10 +59,10 @@ void nist5hash(void *output, const void *input)
|
||||
int scanhash_nist5( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash64[8] __attribute__((aligned(32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash64[8] __attribute__((aligned(32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
@@ -87,6 +88,9 @@ int scanhash_nist5( struct work *work, uint32_t max_nonce,
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < 6; m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
@@ -94,9 +98,24 @@ int scanhash_nist5( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
nist5hash(hash64, endiandata);
|
||||
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget))
|
||||
submit_solution( work, hash64, mythr );
|
||||
#ifndef DEBUG_ALGO
|
||||
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@@ -142,7 +142,11 @@ int scanhash_zr5( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
pdata[0] = tmpdata[0];
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash );
|
||||
if (opt_debug)
|
||||
applog(LOG_INFO, "found nonce %x", nonce);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
nonce++;
|
||||
|
@@ -6,25 +6,26 @@
|
||||
|
||||
#define M17( macro ) \
|
||||
do { \
|
||||
macro( 0, 1, 2, 4 ); \
|
||||
macro( 1, 2, 3, 5 ); \
|
||||
macro( 2, 3, 4, 6 ); \
|
||||
macro( 3, 4, 5, 7 ); \
|
||||
macro( 4, 5, 6, 8 ); \
|
||||
macro( 5, 6, 7, 9 ); \
|
||||
macro( 6, 7, 8, 10 ); \
|
||||
macro( 7, 8, 9, 11 ); \
|
||||
macro( 8, 9, 10, 12 ); \
|
||||
macro( 9, 10, 11, 13 ); \
|
||||
macro( 10, 11, 12, 14 ); \
|
||||
macro( 11, 12, 13, 15 ); \
|
||||
macro( 12, 13, 14, 16 ); \
|
||||
macro( 13, 14, 15, 0 ); \
|
||||
macro( 14, 15, 16, 1 ); \
|
||||
macro( 15, 16, 0, 2 ); \
|
||||
macro( 16, 0, 1, 3 ); \
|
||||
macro( 0, 1, 2, 4); \
|
||||
macro( 1, 2, 3, 5); \
|
||||
macro( 2, 3, 4, 6); \
|
||||
macro( 3, 4, 5, 7); \
|
||||
macro( 4, 5, 6, 8); \
|
||||
macro( 5, 6, 7, 9); \
|
||||
macro( 6, 7, 8, 10); \
|
||||
macro( 7, 8, 9, 11); \
|
||||
macro( 8, 9, 10, 12); \
|
||||
macro( 9, 10, 11, 13); \
|
||||
macro(10, 11, 12, 14); \
|
||||
macro(11, 12, 13, 15); \
|
||||
macro(12, 13, 14, 16); \
|
||||
macro(13, 14, 15, 0); \
|
||||
macro(14, 15, 16, 1); \
|
||||
macro(15, 16, 0, 2); \
|
||||
macro(16, 0, 1, 3); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define RSTATE(n0, n1, n2, n4) (a ## n0 = sc->state[n0])
|
||||
|
||||
#define WSTATE(n0, n1, n2, n4) (sc->state[n0] = a ## n0)
|
||||
@@ -49,7 +50,9 @@ do { \
|
||||
|
||||
#define LVARS_4W \
|
||||
LVAR17_4W(a) \
|
||||
LVAR17_4W(g)
|
||||
LVAR17_4W(g) \
|
||||
LVAR17_4W(p) \
|
||||
LVAR17_4W(t)
|
||||
|
||||
#define BUPDATE1_4W( n0, n2 ) \
|
||||
do { \
|
||||
@@ -60,14 +63,14 @@ do { \
|
||||
|
||||
#define BUPDATE_4W \
|
||||
do { \
|
||||
BUPDATE1_4W( 0, 2 ); \
|
||||
BUPDATE1_4W( 1, 3 ); \
|
||||
BUPDATE1_4W( 2, 4 ); \
|
||||
BUPDATE1_4W( 3, 5 ); \
|
||||
BUPDATE1_4W( 4, 6 ); \
|
||||
BUPDATE1_4W( 5, 7 ); \
|
||||
BUPDATE1_4W( 6, 0 ); \
|
||||
BUPDATE1_4W( 7, 1 ); \
|
||||
BUPDATE1_4W(0, 2); \
|
||||
BUPDATE1_4W(1, 3); \
|
||||
BUPDATE1_4W(2, 4); \
|
||||
BUPDATE1_4W(3, 5); \
|
||||
BUPDATE1_4W(4, 6); \
|
||||
BUPDATE1_4W(5, 7); \
|
||||
BUPDATE1_4W(6, 0); \
|
||||
BUPDATE1_4W(7, 1); \
|
||||
} while (0)
|
||||
|
||||
#define GAMMA_4W(n0, n1, n2, n4) \
|
||||
@@ -75,46 +78,46 @@ do { \
|
||||
_mm_or_si128( a ## n1, mm128_not( a ## n2 ) ) ) )
|
||||
|
||||
#define PI_ALL_4W do { \
|
||||
a0 = g0; \
|
||||
a1 = mm128_rol_32( g7, 1 ); \
|
||||
a2 = mm128_rol_32( g14, 3 ); \
|
||||
a3 = mm128_rol_32( g4, 6 ); \
|
||||
a4 = mm128_rol_32( g11, 10 ); \
|
||||
a5 = mm128_rol_32( g1, 15 ); \
|
||||
a6 = mm128_rol_32( g8, 21 ); \
|
||||
a7 = mm128_rol_32( g15, 28 ); \
|
||||
a8 = mm128_rol_32( g5, 4 ); \
|
||||
a9 = mm128_rol_32( g12, 13 ); \
|
||||
a10 = mm128_rol_32( g2, 23 ); \
|
||||
a11 = mm128_rol_32( g9, 2 ); \
|
||||
a12 = mm128_rol_32( g16, 14 ); \
|
||||
a13 = mm128_rol_32( g6, 27 ); \
|
||||
a14 = mm128_rol_32( g13, 9 ); \
|
||||
a15 = mm128_rol_32( g3, 24 ); \
|
||||
a16 = mm128_rol_32( g10, 8 ); \
|
||||
p0 = g0; \
|
||||
p1 = mm128_rol_32( g7, 1 ); \
|
||||
p2 = mm128_rol_32( g14, 3 ); \
|
||||
p3 = mm128_rol_32( g4, 6 ); \
|
||||
p4 = mm128_rol_32( g11, 10 ); \
|
||||
p5 = mm128_rol_32( g1, 15 ); \
|
||||
p6 = mm128_rol_32( g8, 21 ); \
|
||||
p7 = mm128_rol_32( g15, 28 ); \
|
||||
p8 = mm128_rol_32( g5, 4 ); \
|
||||
p9 = mm128_rol_32( g12, 13 ); \
|
||||
p10 = mm128_rol_32( g2, 23 ); \
|
||||
p11 = mm128_rol_32( g9, 2 ); \
|
||||
p12 = mm128_rol_32( g16, 14 ); \
|
||||
p13 = mm128_rol_32( g6, 27 ); \
|
||||
p14 = mm128_rol_32( g13, 9 ); \
|
||||
p15 = mm128_rol_32( g3, 24 ); \
|
||||
p16 = mm128_rol_32( g10, 8 ); \
|
||||
} while (0)
|
||||
|
||||
#define THETA_4W(n0, n1, n2, n4) \
|
||||
( g ## n0 = _mm_xor_si128( a ## n0, _mm_xor_si128( a ## n1, a ## n4 ) ) )
|
||||
( t ## n0 = _mm_xor_si128( p ## n0, _mm_xor_si128( p ## n1, p ## n4 ) ) )
|
||||
|
||||
#define SIGMA_ALL_4W do { \
|
||||
a0 = _mm_xor_si128( g0, m128_one_32 ); \
|
||||
a1 = _mm_xor_si128( g1, INW2( 0 ) ); \
|
||||
a2 = _mm_xor_si128( g2, INW2( 1 ) ); \
|
||||
a3 = _mm_xor_si128( g3, INW2( 2 ) ); \
|
||||
a4 = _mm_xor_si128( g4, INW2( 3 ) ); \
|
||||
a5 = _mm_xor_si128( g5, INW2( 4 ) ); \
|
||||
a6 = _mm_xor_si128( g6, INW2( 5 ) ); \
|
||||
a7 = _mm_xor_si128( g7, INW2( 6 ) ); \
|
||||
a8 = _mm_xor_si128( g8, INW2( 7 ) ); \
|
||||
a9 = _mm_xor_si128( g9, sc->buffer[ ptr16 ][0] ); \
|
||||
a10 = _mm_xor_si128( g10, sc->buffer[ ptr16 ][1] ); \
|
||||
a11 = _mm_xor_si128( g11, sc->buffer[ ptr16 ][2] ); \
|
||||
a12 = _mm_xor_si128( g12, sc->buffer[ ptr16 ][3] ); \
|
||||
a13 = _mm_xor_si128( g13, sc->buffer[ ptr16 ][4] ); \
|
||||
a14 = _mm_xor_si128( g14, sc->buffer[ ptr16 ][5] ); \
|
||||
a15 = _mm_xor_si128( g15, sc->buffer[ ptr16 ][6] ); \
|
||||
a16 = _mm_xor_si128( g16, sc->buffer[ ptr16 ][7] ); \
|
||||
a0 = _mm_xor_si128( t0, _mm_set1_epi32( 1 ) ); \
|
||||
a1 = _mm_xor_si128( t1, INW2( 0 ) ); \
|
||||
a2 = _mm_xor_si128( t2, INW2( 1 ) ); \
|
||||
a3 = _mm_xor_si128( t3, INW2( 2 ) ); \
|
||||
a4 = _mm_xor_si128( t4, INW2( 3 ) ); \
|
||||
a5 = _mm_xor_si128( t5, INW2( 4 ) ); \
|
||||
a6 = _mm_xor_si128( t6, INW2( 5 ) ); \
|
||||
a7 = _mm_xor_si128( t7, INW2( 6 ) ); \
|
||||
a8 = _mm_xor_si128( t8, INW2( 7 ) ); \
|
||||
a9 = _mm_xor_si128( t9, sc->buffer[ ptr16 ] [0 ] ); \
|
||||
a10 = _mm_xor_si128( t10, sc->buffer[ ptr16 ] [1 ] ); \
|
||||
a11 = _mm_xor_si128( t11, sc->buffer[ ptr16 ] [2 ] ); \
|
||||
a12 = _mm_xor_si128( t12, sc->buffer[ ptr16 ] [3 ] ); \
|
||||
a13 = _mm_xor_si128( t13, sc->buffer[ ptr16 ] [4 ] ); \
|
||||
a14 = _mm_xor_si128( t14, sc->buffer[ ptr16 ] [5 ] ); \
|
||||
a15 = _mm_xor_si128( t15, sc->buffer[ ptr16 ] [6 ] ); \
|
||||
a16 = _mm_xor_si128( t16, sc->buffer[ ptr16 ] [7 ] ); \
|
||||
} while (0)
|
||||
|
||||
#define PANAMA_STEP_4W do { \
|
||||
@@ -142,9 +145,9 @@ panama_4way_push( panama_4way_context *sc, const unsigned char *pbuf,
|
||||
#define INW2(i) INW1(i)
|
||||
|
||||
M17( RSTATE );
|
||||
|
||||
ptr0 = sc->buffer_ptr;
|
||||
while ( num-- > 0 )
|
||||
{
|
||||
while (num -- > 0) {
|
||||
PANAMA_STEP_4W;
|
||||
pbuf = (const unsigned char *)pbuf + 32*4;
|
||||
}
|
||||
@@ -170,11 +173,14 @@ panama_4way_pull( panama_4way_context *sc, unsigned num )
|
||||
#define INW2(i) casti_m128i( sc->buffer[ptr4], i )
|
||||
|
||||
M17( RSTATE );
|
||||
|
||||
ptr0 = sc->buffer_ptr;
|
||||
while ( num-- > 0 )
|
||||
{
|
||||
|
||||
while (num -- > 0) {
|
||||
unsigned ptr4;
|
||||
|
||||
ptr4 = ( (ptr0 + 4) & 31 );
|
||||
|
||||
PANAMA_STEP_4W;
|
||||
}
|
||||
M17( WSTATE );
|
||||
@@ -186,11 +192,18 @@ panama_4way_pull( panama_4way_context *sc, unsigned num )
|
||||
}
|
||||
|
||||
void
|
||||
panama_4way_init( void *cc )
|
||||
panama_4way_init(void *cc)
|
||||
{
|
||||
panama_4way_context *sc;
|
||||
|
||||
sc = cc;
|
||||
/*
|
||||
* This is not completely conformant, but "it will work
|
||||
* everywhere". Initial state consists of zeroes everywhere.
|
||||
* Conceptually, the sph_u32 type may have padding bits which
|
||||
* must not be set to 0; but such an architecture remains to
|
||||
* be seen.
|
||||
*/
|
||||
sc->data_ptr = 0;
|
||||
memset( sc->buffer, 0, sizeof sc->buffer );
|
||||
sc->buffer_ptr = 0;
|
||||
@@ -204,8 +217,7 @@ panama_4way_short( void *cc, const void *data, size_t len )
|
||||
unsigned current;
|
||||
sc = cc;
|
||||
current = sc->data_ptr;
|
||||
while ( len > 0 )
|
||||
{
|
||||
while (len > 0) {
|
||||
unsigned clen;
|
||||
|
||||
clen = ( (sizeof sc->data ) >> 2 ) - current;
|
||||
@@ -271,8 +283,11 @@ panama_4way_close( void *cc, void *dst )
|
||||
*(__m128i*)( sc->data + current ) = m128_one_32;
|
||||
current++;
|
||||
memset_zero_128( (__m128i*)sc->data + current, 32 - current );
|
||||
|
||||
panama_4way_push( sc, sc->data, 1 );
|
||||
|
||||
panama_4way_pull( sc, 32 );
|
||||
|
||||
for ( i = 0; i < 8; i ++ )
|
||||
casti_m128i( dst, i ) = sc->state[i + 9];
|
||||
}
|
||||
@@ -291,7 +306,9 @@ panama_4way_close( void *cc, void *dst )
|
||||
|
||||
#define LVARS_8W \
|
||||
LVAR17_8W(a) \
|
||||
LVAR17_8W(g)
|
||||
LVAR17_8W(g) \
|
||||
LVAR17_8W(p) \
|
||||
LVAR17_8W(t)
|
||||
|
||||
#define BUPDATE1_8W( n0, n2 ) \
|
||||
do { \
|
||||
@@ -302,14 +319,14 @@ do { \
|
||||
|
||||
#define BUPDATE_8W \
|
||||
do { \
|
||||
BUPDATE1_8W( 0, 2 ); \
|
||||
BUPDATE1_8W( 1, 3 ); \
|
||||
BUPDATE1_8W( 2, 4 ); \
|
||||
BUPDATE1_8W( 3, 5 ); \
|
||||
BUPDATE1_8W( 4, 6 ); \
|
||||
BUPDATE1_8W( 5, 7 ); \
|
||||
BUPDATE1_8W( 6, 0 ); \
|
||||
BUPDATE1_8W( 7, 1 ); \
|
||||
BUPDATE1_8W(0, 2); \
|
||||
BUPDATE1_8W(1, 3); \
|
||||
BUPDATE1_8W(2, 4); \
|
||||
BUPDATE1_8W(3, 5); \
|
||||
BUPDATE1_8W(4, 6); \
|
||||
BUPDATE1_8W(5, 7); \
|
||||
BUPDATE1_8W(6, 0); \
|
||||
BUPDATE1_8W(7, 1); \
|
||||
} while (0)
|
||||
|
||||
#define GAMMA_8W(n0, n1, n2, n4) \
|
||||
@@ -317,47 +334,46 @@ do { \
|
||||
_mm256_or_si256( a ## n1, mm256_not( a ## n2 ) ) ) )
|
||||
|
||||
#define PI_ALL_8W do { \
|
||||
a0 = g0; \
|
||||
a1 = mm256_rol_32( g7, 1 ); \
|
||||
a2 = mm256_rol_32( g14, 3 ); \
|
||||
a3 = mm256_rol_32( g4, 6 ); \
|
||||
a4 = mm256_rol_32( g11, 10 ); \
|
||||
a5 = mm256_rol_32( g1, 15 ); \
|
||||
a6 = mm256_rol_32( g8, 21 ); \
|
||||
a7 = mm256_rol_32( g15, 28 ); \
|
||||
a8 = mm256_rol_32( g5, 4 ); \
|
||||
a9 = mm256_rol_32( g12, 13 ); \
|
||||
a10 = mm256_rol_32( g2, 23 ); \
|
||||
a11 = mm256_rol_32( g9, 2 ); \
|
||||
a12 = mm256_rol_32( g16, 14 ); \
|
||||
a13 = mm256_rol_32( g6, 27 ); \
|
||||
a14 = mm256_rol_32( g13, 9 ); \
|
||||
a15 = mm256_rol_32( g3, 24 ); \
|
||||
a16 = mm256_rol_32( g10, 8 ); \
|
||||
p0 = g0; \
|
||||
p1 = mm256_rol_32( g7, 1 ); \
|
||||
p2 = mm256_rol_32( g14, 3 ); \
|
||||
p3 = mm256_rol_32( g4, 6 ); \
|
||||
p4 = mm256_rol_32( g11, 10 ); \
|
||||
p5 = mm256_rol_32( g1, 15 ); \
|
||||
p6 = mm256_rol_32( g8, 21 ); \
|
||||
p7 = mm256_rol_32( g15, 28 ); \
|
||||
p8 = mm256_rol_32( g5, 4 ); \
|
||||
p9 = mm256_rol_32( g12, 13 ); \
|
||||
p10 = mm256_rol_32( g2, 23 ); \
|
||||
p11 = mm256_rol_32( g9, 2 ); \
|
||||
p12 = mm256_rol_32( g16, 14 ); \
|
||||
p13 = mm256_rol_32( g6, 27 ); \
|
||||
p14 = mm256_rol_32( g13, 9 ); \
|
||||
p15 = mm256_rol_32( g3, 24 ); \
|
||||
p16 = mm256_rol_32( g10, 8 ); \
|
||||
} while (0)
|
||||
|
||||
#define THETA_8W(n0, n1, n2, n4) \
|
||||
( g ## n0 = _mm256_xor_si256( a ## n0, _mm256_xor_si256( a ## n1, \
|
||||
a ## n4 ) ) )
|
||||
( t ## n0 = _mm256_xor_si256( p ## n0, _mm256_xor_si256( p ## n1, p ## n4 ) ) )
|
||||
|
||||
#define SIGMA_ALL_8W do { \
|
||||
a0 = _mm256_xor_si256( g0, m256_one_32 ); \
|
||||
a1 = _mm256_xor_si256( g1, INW2( 0 ) ); \
|
||||
a2 = _mm256_xor_si256( g2, INW2( 1 ) ); \
|
||||
a3 = _mm256_xor_si256( g3, INW2( 2 ) ); \
|
||||
a4 = _mm256_xor_si256( g4, INW2( 3 ) ); \
|
||||
a5 = _mm256_xor_si256( g5, INW2( 4 ) ); \
|
||||
a6 = _mm256_xor_si256( g6, INW2( 5 ) ); \
|
||||
a7 = _mm256_xor_si256( g7, INW2( 6 ) ); \
|
||||
a8 = _mm256_xor_si256( g8, INW2( 7 ) ); \
|
||||
a9 = _mm256_xor_si256( g9, sc->buffer[ ptr16 ][0] ); \
|
||||
a10 = _mm256_xor_si256( g10, sc->buffer[ ptr16 ][1] ); \
|
||||
a11 = _mm256_xor_si256( g11, sc->buffer[ ptr16 ][2] ); \
|
||||
a12 = _mm256_xor_si256( g12, sc->buffer[ ptr16 ][3] ); \
|
||||
a13 = _mm256_xor_si256( g13, sc->buffer[ ptr16 ][4] ); \
|
||||
a14 = _mm256_xor_si256( g14, sc->buffer[ ptr16 ][5] ); \
|
||||
a15 = _mm256_xor_si256( g15, sc->buffer[ ptr16 ][6] ); \
|
||||
a16 = _mm256_xor_si256( g16, sc->buffer[ ptr16 ][7] ); \
|
||||
a0 = _mm256_xor_si256( t0, m256_one_32 ); \
|
||||
a1 = _mm256_xor_si256( t1, INW2( 0 ) ); \
|
||||
a2 = _mm256_xor_si256( t2, INW2( 1 ) ); \
|
||||
a3 = _mm256_xor_si256( t3, INW2( 2 ) ); \
|
||||
a4 = _mm256_xor_si256( t4, INW2( 3 ) ); \
|
||||
a5 = _mm256_xor_si256( t5, INW2( 4 ) ); \
|
||||
a6 = _mm256_xor_si256( t6, INW2( 5 ) ); \
|
||||
a7 = _mm256_xor_si256( t7, INW2( 6 ) ); \
|
||||
a8 = _mm256_xor_si256( t8, INW2( 7 ) ); \
|
||||
a9 = _mm256_xor_si256( t9, sc->buffer[ ptr16 ] [0 ] ); \
|
||||
a10 = _mm256_xor_si256( t10, sc->buffer[ ptr16 ] [1 ] ); \
|
||||
a11 = _mm256_xor_si256( t11, sc->buffer[ ptr16 ] [2 ] ); \
|
||||
a12 = _mm256_xor_si256( t12, sc->buffer[ ptr16 ] [3 ] ); \
|
||||
a13 = _mm256_xor_si256( t13, sc->buffer[ ptr16 ] [4 ] ); \
|
||||
a14 = _mm256_xor_si256( t14, sc->buffer[ ptr16 ] [5 ] ); \
|
||||
a15 = _mm256_xor_si256( t15, sc->buffer[ ptr16 ] [6 ] ); \
|
||||
a16 = _mm256_xor_si256( t16, sc->buffer[ ptr16 ] [7 ] ); \
|
||||
} while (0)
|
||||
|
||||
#define PANAMA_STEP_8W do { \
|
||||
@@ -385,6 +401,7 @@ panama_8way_push( panama_8way_context *sc, const unsigned char *pbuf,
|
||||
#define INW2(i) INW1(i)
|
||||
|
||||
M17( RSTATE );
|
||||
|
||||
ptr0 = sc->buffer_ptr;
|
||||
while ( num-- > 0 )
|
||||
{
|
||||
@@ -412,9 +429,9 @@ panama_8way_pull( panama_8way_context *sc, unsigned num )
|
||||
|
||||
ptr0 = sc->buffer_ptr;
|
||||
|
||||
while ( num-- > 0 )
|
||||
{
|
||||
while (num -- > 0) {
|
||||
unsigned ptr4;
|
||||
|
||||
ptr4 = ( (ptr0 + 4) & 31 );
|
||||
PANAMA_STEP_8W;
|
||||
}
|
||||
@@ -432,6 +449,13 @@ panama_8way_init( void *cc )
|
||||
panama_8way_context *sc;
|
||||
|
||||
sc = cc;
|
||||
/*
|
||||
* This is not completely conformant, but "it will work
|
||||
* everywhere". Initial state consists of zeroes everywhere.
|
||||
* Conceptually, the sph_u32 type may have padding bits which
|
||||
* must not be set to 0; but such an architecture remains to
|
||||
* be seen.
|
||||
*/
|
||||
sc->data_ptr = 0;
|
||||
memset( sc->buffer, 0, sizeof sc->buffer );
|
||||
sc->buffer_ptr = 0;
|
||||
@@ -445,8 +469,7 @@ panama_8way_short( void *cc, const void *data, size_t len )
|
||||
unsigned current;
|
||||
sc = cc;
|
||||
current = sc->data_ptr;
|
||||
while ( len > 0 )
|
||||
{
|
||||
while (len > 0) {
|
||||
unsigned clen;
|
||||
|
||||
clen = ( (sizeof sc->data ) >> 3 ) - current;
|
||||
@@ -463,6 +486,7 @@ panama_8way_short( void *cc, const void *data, size_t len )
|
||||
panama_8way_push( sc, sc->data, 1 );
|
||||
}
|
||||
}
|
||||
|
||||
sc->data_ptr = current;
|
||||
}
|
||||
|
||||
@@ -511,7 +535,9 @@ panama_8way_close( void *cc, void *dst )
|
||||
*(__m256i*)( sc->data + current ) = m256_one_32;
|
||||
current++;
|
||||
memset_zero_256( (__m256i*)sc->data + current, 32 - current );
|
||||
|
||||
panama_8way_push( sc, sc->data, 1 );
|
||||
|
||||
panama_8way_pull( sc, 32 );
|
||||
|
||||
for ( i = 0; i < 8; i ++ )
|
||||
|
@@ -160,12 +160,16 @@ int scanhash_anime( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
|
||||
submit_solution( work, hash, mythr );
|
||||
{
|
||||
work_set_target_ratio( work, hash );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
n++;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -330,8 +330,11 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[19], n);
|
||||
hmq1725hash(hash64, endiandata);
|
||||
if (((hash64[7]&0xFFFFFFFF)==0) &&
|
||||
fulltest(hash64, ptarget))
|
||||
submit_solution( work, hash64, mythr );
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
}
|
||||
else if (ptarget[7]<=0xF)
|
||||
@@ -341,8 +344,11 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[19], n);
|
||||
hmq1725hash(hash64, endiandata);
|
||||
if (((hash64[7]&0xFFFFFFF0)==0) &&
|
||||
fulltest(hash64, ptarget))
|
||||
submit_solution( work, hash64, mythr );
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
}
|
||||
else if (ptarget[7]<=0xFF)
|
||||
@@ -352,8 +358,11 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[19], n);
|
||||
hmq1725hash(hash64, endiandata);
|
||||
if (((hash64[7]&0xFFFFFF00)==0) &&
|
||||
fulltest(hash64, ptarget))
|
||||
submit_solution( work, hash64, mythr );
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
}
|
||||
else if (ptarget[7]<=0xFFF)
|
||||
@@ -363,9 +372,13 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[19], n);
|
||||
hmq1725hash(hash64, endiandata);
|
||||
if (((hash64[7]&0xFFFFF000)==0) &&
|
||||
fulltest(hash64, ptarget))
|
||||
submit_solution( work, hash64, mythr );
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
}
|
||||
else if (ptarget[7]<=0xFFFF)
|
||||
{
|
||||
@@ -374,9 +387,13 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[19], n);
|
||||
hmq1725hash(hash64, endiandata);
|
||||
if (((hash64[7]&0xFFFF0000)==0) &&
|
||||
fulltest(hash64, ptarget))
|
||||
submit_solution( work, hash64, mythr );
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -384,10 +401,15 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
hmq1725hash(hash64, endiandata);
|
||||
if (fulltest(hash64, ptarget))
|
||||
submit_solution( work, hash64, mythr );
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
}
|
||||
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
|
@@ -126,11 +126,15 @@ int scanhash_quark( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
quark_hash(hash64, &endiandata);
|
||||
if ((hash64[7]&0xFFFFFF00)==0)
|
||||
{
|
||||
if (fulltest(hash64, ptarget))
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
if ((hash64[7]&0xFFFFFF00)==0)
|
||||
{
|
||||
if (fulltest(hash64, ptarget))
|
||||
{
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
|
@@ -92,21 +92,46 @@ int scanhash_deep( struct work *work, uint32_t max_nonce,
|
||||
|
||||
deep_luffa_midstate( endiandata );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for ( int m=0; m < 6; m++ )
|
||||
{
|
||||
{
|
||||
if ( Htarg <= htmax[m] )
|
||||
{
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
{
|
||||
pdata[19] = ++n;
|
||||
be32enc( &endiandata[19], n );
|
||||
deep_hash( hash64, endiandata );
|
||||
if (!(hash64[7] & mask))
|
||||
if ( fulltest(hash64, ptarget) )
|
||||
submit_solution( work, hash64, mythr );
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
break;
|
||||
be32enc( &endiandata[19], n );
|
||||
deep_hash( hash64, endiandata );
|
||||
#ifndef DEBUG_ALGO
|
||||
if (!(hash64[7] & mask))
|
||||
{
|
||||
if ( fulltest(hash64, ptarget) )
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
// else
|
||||
// {
|
||||
// applog(LOG_INFO, "Result does not validate on CPU!");
|
||||
// }
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -104,23 +104,48 @@ int scanhash_qubit( struct work *work, uint32_t max_nonce,
|
||||
|
||||
qubit_luffa_midstate( endiandata );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for ( int m=0; m < 6; m++ )
|
||||
{
|
||||
{
|
||||
if ( Htarg <= htmax[m] )
|
||||
{
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
{
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
qubit_hash(hash64, endiandata);
|
||||
if (!(hash64[7] & mask))
|
||||
if ( fulltest(hash64, ptarget) )
|
||||
submit_solution( work, hash64, mythr );
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
}
|
||||
be32enc(&endiandata[19], n);
|
||||
qubit_hash(hash64, endiandata);
|
||||
#ifndef DEBUG_ALGO
|
||||
if (!(hash64[7] & mask))
|
||||
{
|
||||
if ( fulltest(hash64, ptarget) )
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
// else
|
||||
// {
|
||||
// applog(LOG_INFO, "Result does not validate on CPU!");
|
||||
// }
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
|
@@ -753,8 +753,10 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
|
||||
|
||||
for (i = 0; i < throughput; i++) {
|
||||
if (unlikely(hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget))) {
|
||||
*hashes_done = n - pdata[19] + 1;
|
||||
pdata[19] = data[i * 20 + 19];
|
||||
submit_solution( work, hash, mythr );
|
||||
work_set_target_ratio( work, hash );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
} while (likely(n < max_nonce && !work_restart[thr_id].restart));
|
||||
|
@@ -1,8 +1,6 @@
|
||||
#include "shavite-hash-4way.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
static const uint32_t IV512[] =
|
||||
{
|
||||
0x72FCCDD8, 0x79CA4727, 0x128A077B, 0x40D55AEC,
|
||||
@@ -11,6 +9,8 @@ static const uint32_t IV512[] =
|
||||
0xE275EADE, 0x502D9FCD, 0xB9357178, 0x022A4B9A
|
||||
};
|
||||
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
#define mm512_ror2x512hi_1x32( a, b ) \
|
||||
_mm512_mask_blend_epi32( 0x8888, mm512_ror128_32( a ), \
|
||||
mm512_ror128_32( b ) )
|
||||
|
@@ -1035,7 +1035,7 @@ int simd_4way_update( simd_4way_context *state, const void *data,
|
||||
|
||||
while ( databitlen > 0 )
|
||||
{
|
||||
if ( ( current == 0 ) && ( databitlen >= bs ) )
|
||||
if ( current == 0 && databitlen >= bs )
|
||||
{
|
||||
// We can hash the data directly from the input buffer.
|
||||
SIMD_4way_Compress( state, data, 0 );
|
||||
@@ -1049,13 +1049,13 @@ int simd_4way_update( simd_4way_context *state, const void *data,
|
||||
int len = bs - current;
|
||||
if ( databitlen < len )
|
||||
{
|
||||
memcpy( state->buffer + 4 * (current/8), data, 4 * (databitlen/8) );
|
||||
memcpy( state->buffer + 4*(current/8), data, 4*((databitlen+7)/8) );
|
||||
state->count += databitlen;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( state->buffer + 4 * (current / 8), data, 4 * (len / 8) );
|
||||
memcpy( state->buffer + 4*(current/8), data, 4*(len/8) );
|
||||
state->count += len;
|
||||
databitlen -= len;
|
||||
data += 4*(len/8);
|
||||
@@ -1128,7 +1128,7 @@ int simd_4way_update_close( simd_4way_context *state, void *hashval,
|
||||
int len = bs - current;
|
||||
if ( databitlen < len )
|
||||
{
|
||||
memcpy( state->buffer + 4*( current/8 ), data, 4*( (databitlen)/8 ) );
|
||||
memcpy( state->buffer + 4*( current/8 ), data, 4*( (databitlen+7)/8 ) );
|
||||
state->count += databitlen;
|
||||
break;
|
||||
}
|
||||
@@ -1149,7 +1149,7 @@ int simd_4way_update_close( simd_4way_context *state, void *hashval,
|
||||
// If there is still some data in the buffer, hash it
|
||||
if ( current )
|
||||
{
|
||||
current = current / 8;
|
||||
current = ( current+7 ) / 8;
|
||||
memset( state->buffer + 4*current, 0, 4*( state->blocksize/8 - current) );
|
||||
SIMD_4way_Compress( state, state->buffer, 0 );
|
||||
}
|
||||
|
@@ -78,12 +78,17 @@ int scanhash_whirlpool( struct work* work, uint32_t max_nonce,
|
||||
do {
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t vhash[8];
|
||||
pdata[19] = ++n;
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n );
|
||||
whirlpool_hash(vhash, endiandata);
|
||||
|
||||
if (vhash[7] <= Htarg && fulltest(vhash, ptarget))
|
||||
submit_solution( work, vhash, mythr );
|
||||
{
|
||||
work_set_target_ratio(work, vhash);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
|
@@ -48,7 +48,11 @@ int scanhash_whirlpoolx( struct work* work, uint32_t max_nonce,
|
||||
whirlpoolx_hash(vhash, endiandata);
|
||||
|
||||
if (vhash[7] <= Htarg && fulltest(vhash, ptarget))
|
||||
submit_solution( work, vhash, mythr );
|
||||
{
|
||||
work_set_target_ratio(work, vhash);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
|
@@ -138,12 +138,16 @@ int scanhash_c11( struct work *work, uint32_t max_nonce,
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
do
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
{
|
||||
be32enc( &endiandata[19], nonce );
|
||||
c11_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg && fulltest(hash, ptarget) )
|
||||
submit_solution( work, hash, mythr );
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
|
@@ -293,10 +293,14 @@ int scanhash_timetravel( struct work *work, uint32_t max_nonce,
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget) )
|
||||
{
|
||||
work_set_target_ratio( work, hash );
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !(*restart));
|
||||
|
||||
pdata[19] = nonce;
|
||||
|
@@ -334,10 +334,14 @@ int scanhash_timetravel10( struct work *work, uint32_t max_nonce,
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget) )
|
||||
{
|
||||
work_set_target_ratio( work, hash );
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
work_set_target_ratio( work, hash );
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !(*restart));
|
||||
|
||||
pdata[19] = nonce;
|
||||
|
@@ -98,6 +98,9 @@ int scanhash_tribus( struct work *work, uint32_t max_nonce,
|
||||
sph_jh512_init( &tribus_ctx.jh );
|
||||
sph_jh512( &tribus_ctx.jh, endiandata, 64 );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < 6; m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
@@ -105,9 +108,25 @@ int scanhash_tribus( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
tribus_hash(hash32, endiandata);
|
||||
if ((!(hash32[7] & mask)) && fulltest(hash32, ptarget))
|
||||
submit_solution( work, hash32, mythr );
|
||||
#ifndef DEBUG_ALGO
|
||||
if ((!(hash32[7] & mask)) && fulltest(hash32, ptarget)) {
|
||||
work_set_target_ratio(work, hash32);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash32[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash32, ptarget)) {
|
||||
work_set_target_ratio(work, hash32);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@@ -163,7 +163,11 @@ int scanhash_x11( struct work *work, uint32_t max_nonce,
|
||||
if ( ( hash64[7] & mask ) == 0 )
|
||||
{
|
||||
if ( fulltest( hash64, ptarget ) )
|
||||
submit_solution( work, hash64, mythr );
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
}
|
||||
|
@@ -199,8 +199,12 @@ int scanhash_x11evo( struct work* work, uint32_t max_nonce,
|
||||
if ( ( hash64[7] & hmask ) == 0 )
|
||||
{
|
||||
if ( fulltest( hash64, ptarget ) )
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
|
@@ -448,7 +448,6 @@ void x11gost_4way_hash( void *state, const void *input )
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
|
||||
|
@@ -15,7 +15,7 @@ bool register_x11gost_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_x11gost;
|
||||
gate->hash = (void*)&x11gost_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -128,8 +128,9 @@ void x11gost_hash(void *output, const void *input)
|
||||
int scanhash_x11gost( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
@@ -148,13 +149,16 @@ int scanhash_x11gost( struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[19], nonce);
|
||||
x11gost_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget))
|
||||
{
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !(*restart));
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
|
@@ -158,6 +158,9 @@ int scanhash_x12( struct work *work, uint32_t max_nonce,
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < 6; m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
@@ -165,10 +168,33 @@ int scanhash_x12( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
x12hash(hash64, endiandata);
|
||||
#ifndef DEBUG_ALGO
|
||||
if (!(hash64[7] & mask))
|
||||
if ( fulltest(hash64, ptarget) )
|
||||
submit_solution( work, hash64, mythr );
|
||||
{
|
||||
if ( fulltest(hash64, ptarget) )
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
// else
|
||||
// {
|
||||
// applog(LOG_INFO, "Result does not validate on CPU!");
|
||||
// }
|
||||
}
|
||||
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@@ -115,10 +115,11 @@ int scanhash_phi1612( struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[19], nonce);
|
||||
phi1612_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget))
|
||||
{
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
work_set_target_ratio( work, hash );
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
|
||||
|
@@ -68,9 +68,11 @@ int scanhash_skunk( struct work *work, uint32_t max_nonce,
|
||||
skunkhash( hash, endiandata );
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
|
@@ -161,8 +161,11 @@ int scanhash_x13( struct work *work, uint32_t max_nonce,
|
||||
};
|
||||
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < 6; m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
@@ -170,11 +173,31 @@ int scanhash_x13( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
x13hash(hash64, endiandata);
|
||||
#ifndef DEBUG_ALGO
|
||||
if (!(hash64[7] & mask))
|
||||
{
|
||||
if ( fulltest(hash64, ptarget) )
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
{
|
||||
if ( fulltest(hash64, ptarget) )
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
// else
|
||||
// {
|
||||
// applog(LOG_INFO, "Result does not validate on CPU!");
|
||||
// }
|
||||
}
|
||||
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
|
@@ -165,6 +165,10 @@ int scanhash_x13bcd( struct work *work, uint32_t max_nonce,
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
if (Htarg != 0)
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < 6; m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
@@ -172,9 +176,24 @@ int scanhash_x13bcd( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
x13bcd_hash(hash64, endiandata);
|
||||
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget))
|
||||
submit_solution( work, hash64, mythr );
|
||||
#ifndef DEBUG_ALGO
|
||||
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@@ -177,6 +177,10 @@ int scanhash_x13sm3( struct work *work, uint32_t max_nonce,
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
if (Htarg != 0)
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < 6; m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
@@ -184,8 +188,22 @@ int scanhash_x13sm3( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
x13sm3_hash(hash64, endiandata);
|
||||
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget))
|
||||
submit_solution( work, hash64, mythr );
|
||||
#ifndef DEBUG_ALGO
|
||||
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
|
@@ -63,10 +63,11 @@ int scanhash_axiom( struct work *work,
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
axiomhash(hash64, endiandata);
|
||||
if (hash64[7] < Htarg && fulltest(hash64, ptarget))
|
||||
{
|
||||
if (hash64[7] < Htarg && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
submit_solution( work, hash64, mythr );
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
n++;
|
||||
|
||||
|
@@ -100,10 +100,11 @@ int scanhash_polytimos( struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[19], nonce);
|
||||
polytimos_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget))
|
||||
{
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
work_set_target_ratio(work, hash);
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
|
||||
|
@@ -89,10 +89,11 @@ int scanhash_veltor( struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[19], nonce);
|
||||
veltor_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget))
|
||||
{
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
work_set_target_ratio(work, hash);
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
|
||||
|
@@ -170,6 +170,10 @@ int scanhash_x14( struct work *work, uint32_t max_nonce,
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
if (Htarg != 0)
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < 6; m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
@@ -177,9 +181,24 @@ int scanhash_x14( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
x14hash(hash64, endiandata);
|
||||
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget))
|
||||
submit_solution( work, hash64, mythr );
|
||||
#ifndef DEBUG_ALGO
|
||||
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@@ -217,22 +217,22 @@ void x16r_8way_hash( void* output, const void* input )
|
||||
case CUBEHASH:
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
break;
|
||||
case SHAVITE:
|
||||
#if defined(__VAES__)
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
#else
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
@@ -275,11 +275,11 @@ void x16r_8way_hash( void* output, const void* input )
|
||||
#if defined(__VAES__)
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
#else
|
||||
init_echo( &ctx.echo, 512 );
|
||||
|
@@ -211,22 +211,22 @@ void x16rt_8way_hash( void* output, const void* input )
|
||||
case CUBEHASH:
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
break;
|
||||
case SHAVITE:
|
||||
#if defined(__VAES__)
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
#else
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
@@ -269,11 +269,11 @@ void x16rt_8way_hash( void* output, const void* input )
|
||||
#if defined(__VAES__)
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
#else
|
||||
init_echo( &ctx.echo, 512 );
|
||||
|
@@ -42,12 +42,15 @@ union _x16rv2_8way_context_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
// hashState_groestl groestl;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cube_4way_context cube;
|
||||
// sph_shavite512_context shavite;
|
||||
simd_4way_context simd;
|
||||
// hashState_echo echo;
|
||||
hamsi512_8way_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
shabal512_8way_context shabal;
|
||||
@@ -272,22 +275,22 @@ void x16rv2_8way_hash( void* output, const void* input )
|
||||
case CUBEHASH:
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
break;
|
||||
case SHAVITE:
|
||||
#if defined(__VAES__)
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
#else
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
@@ -330,11 +333,11 @@ void x16rv2_8way_hash( void* output, const void* input )
|
||||
#if defined(__VAES__)
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
#else
|
||||
init_echo( &ctx.echo, 512 );
|
||||
|
@@ -230,22 +230,22 @@ void x21s_8way_hash( void* output, const void* input )
|
||||
case CUBEHASH:
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
break;
|
||||
case SHAVITE:
|
||||
#if defined(__VAES__)
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
#else
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
@@ -285,14 +285,15 @@ void x21s_8way_hash( void* output, const void* input )
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
break;
|
||||
case ECHO:
|
||||
|
||||
#if defined(__VAES__)
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
#else
|
||||
init_echo( &ctx.echo, 512 );
|
||||
@@ -516,7 +517,7 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
}
|
||||
|
||||
do
|
||||
|
@@ -44,15 +44,15 @@ bool register_xi20r_algo( algo_gate_t* gate );
|
||||
|
||||
void x20r_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_x20r_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
int scanhash_x20r_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#endif
|
||||
|
||||
void x20rhash( void *state, const void *input );
|
||||
|
||||
int scanhash_x20r( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
int scanhash_x20r( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -228,8 +228,8 @@ void x20r_hash(void* output, const void* input)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_x20r( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
int scanhash_x20r( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
@@ -238,7 +238,6 @@ int scanhash_x20r( struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
for (int k=0; k < 19; k++)
|
||||
@@ -260,9 +259,11 @@ int scanhash_x20r( struct work *work, uint32_t max_nonce,
|
||||
x20r_hash( hash32, endiandata );
|
||||
|
||||
if ( hash32[7] <= Htarg && fulltest( hash32, ptarget ) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash32, mythr );
|
||||
{
|
||||
work_set_target_ratio( work, hash32 );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.2.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.1.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.11.2'
|
||||
PACKAGE_STRING='cpuminer-opt 3.11.2'
|
||||
PACKAGE_VERSION='3.11.1'
|
||||
PACKAGE_STRING='cpuminer-opt 3.11.1'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.11.2 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.11.1 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1404,7 +1404,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.11.2:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.11.1:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1509,7 +1509,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.11.2
|
||||
cpuminer-opt configure 3.11.1
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.11.2, which was
|
||||
It was created by cpuminer-opt $as_me 3.11.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2993,7 +2993,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.11.2'
|
||||
VERSION='3.11.1'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.11.2, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.11.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.11.2
|
||||
cpuminer-opt config.status 3.11.1
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.11.2])
|
||||
AC_INIT([cpuminer-opt], [3.11.1])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
@@ -3343,7 +3343,7 @@ static void show_credits()
|
||||
{
|
||||
printf("\n ********** "PACKAGE_NAME" "PACKAGE_VERSION" *********** \n");
|
||||
printf(" A CPU miner with multi algo support and optimized for CPUs\n");
|
||||
printf(" with AVX512, SHA and VAES extensions.\n");
|
||||
printf(" with AES_NI, AVX2, AVX512, SHA and VAES extensions.\n");
|
||||
printf(" BTC donation address: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT\n\n");
|
||||
}
|
||||
|
||||
|
@@ -132,19 +132,10 @@ do { \
|
||||
|
||||
// Parallel AES, for when x is expected to be in a 256 bit register.
|
||||
// Use same 128 bit key.
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
#define mm256_aesenc_2x128( x, k ) \
|
||||
_mm256_aesenc_epi128( x, m256_const1_128(k ) )
|
||||
|
||||
#else
|
||||
|
||||
#define mm256_aesenc_2x128( x, k ) \
|
||||
mm256_concat_128( _mm_aesenc_si128( mm128_extr_hi128_256( x ), k ), \
|
||||
_mm_aesenc_si128( mm128_extr_lo128_256( x ), k ) )
|
||||
|
||||
#endif
|
||||
|
||||
#define mm256_paesenc_2x128( y, x, k ) do \
|
||||
{ \
|
||||
__m128i *X = (__m128i*)x; \
|
||||
@@ -555,14 +546,14 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )
|
||||
#define mm256_ror512_128( v1, v2 ) \
|
||||
do { \
|
||||
__m256i t = _mm256_permute2x128( v1, v2, 0x03 ); \
|
||||
v1 = _mm256_permute2x128( v2, v1, 0x21 ); \
|
||||
v1 = _mm256__mm256_permute2x128( v2, v1, 0x21 ); \
|
||||
v2 = t; \
|
||||
} while(0)
|
||||
|
||||
#define mm256_rol512_128( v1, v2 ) \
|
||||
do { \
|
||||
__m256i t = _mm256_permute2x128( v1, v2, 0x03 ); \
|
||||
v2 = _mm256_permute2x128( v2, v1, 0x21 ); \
|
||||
v2 = _mm256__mm256_permute2x128( v2, v1, 0x21 ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
|
@@ -44,14 +44,14 @@
|
||||
//
|
||||
// Constants need to be composed at run time by assembling individual
|
||||
// elements, very expensive. The cost is proportional to the number of
|
||||
// different elements therefore use the largest element size possible,
|
||||
// merge smaller integer elements to 64 bits, and group repeated elements.
|
||||
// elements therefor use the largest element size possible, even by
|
||||
// merging smaller values.
|
||||
//
|
||||
// Constants with repeating patterns can be optimized with the smaller
|
||||
// patterns repeated more frequently being more efficient.
|
||||
//
|
||||
// Some specific constants can be very efficient. Zero is very efficient,
|
||||
// 1 and -1 slightly less so.
|
||||
// 1 and -1 slightly less so.
|
||||
//
|
||||
// If an expensive constant is to be reused in the same function it should
|
||||
// be declared as a local variable defined once and reused.
|
||||
|
Reference in New Issue
Block a user