Compare commits

..

1 Commits

Author SHA1 Message Date
Jay D Dee
82c2605d77 v3.11.1 2020-01-06 13:55:17 -05:00
55 changed files with 753 additions and 452 deletions

View File

@@ -8,10 +8,9 @@ Security warning
Miner programs are often flagged as malware by antivirus programs. This is
usually a false positive, they are flagged simply because they are
cryptocurrency miners. However, some malware masquerading as a miner has
been spread using the cover that miners are known to be subject to false
positives ans users will dismiss the AV alert. Always be on alert.
The source code of cpuminer-opt is open for anyone to inspect.
cryptocurrency miners. However, some malware has been spread using the
cover that miners are known to be subject to false positives. Always be on
alert. The source code of cpuminer-opt is open for anyone to inspect.
If you don't trust the software don't download it.
The cryptographic hashing code has been taken from trusted sources but has been
@@ -30,31 +29,12 @@ Requirements
Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
supported.
64 bit Linux or Windows operating system. Apple, Android and Raspberry Pi
are not supported. FreeBSD YMMV.
64 bit Linux or Windows operating system. Apple, Android and Rpi are
not supported. FreeBSD YMMV.
Change Log
----------
v3.11.2
Fixed x11gost (sib) AVX2 invalid shares.
Fixed x16r, x16rv2, x16s, x16rt, x16rt-veil (veil), x21s.
No shares were submitted when cube, shavite or echo were the first function
in the hash order.
Fixed all algos reporting stats problems when mining with SSE2.
Faster Lyra2 AVX512: lyra2z +47%, lyra2rev3 +11%, allium +13%, x21s +6%
Other minor performance improvements.
Known issue:
Lyra2 AVX512 improvements paradoxically reduced performance on x22i and x25x.
https://github.com/JayDDee/cpuminer-opt/issues/225
v3.11.1
Faster panama for x25x AVX2 & AVX512.

View File

@@ -62,7 +62,9 @@ int scanhash_argon2( struct work* work, uint32_t max_nonce,
argon2hash(hash, endiandata);
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
pdata[19] = nonce;
submit_solution( work, hash, mythr );
*hashes_done = pdata[19] - first_nonce;
work_set_target_ratio(work, hash);
return 1;
}
nonce++;
} while (nonce < max_nonce && !work_restart[thr_id].restart);

View File

@@ -43,14 +43,17 @@ int scanhash_blake2b( struct work *work, uint32_t max_nonce,
do {
be32enc(&endiandata[19], n);
//blake2b_hash_end(vhashcpu, endiandata);
blake2b_hash(vhashcpu, endiandata);
if (vhashcpu[7] < Htarg && fulltest(vhashcpu, ptarget))
{
if (vhashcpu[7] < Htarg && fulltest(vhashcpu, ptarget)) {
work_set_target_ratio(work, vhashcpu);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
submit_solution( work, vhashcpu, mythr );
}
n++;
return 1;
}
n++;
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;

View File

@@ -77,15 +77,25 @@ int scanhash_decred( struct work *work, uint32_t max_nonce,
be32enc(&endiandata[k], pdata[k]);
#endif
#ifdef DEBUG_ALGO
if (!thr_id) applog(LOG_DEBUG,"[%d] Target=%08x %08x", thr_id, ptarget[6], ptarget[7]);
#endif
do {
//be32enc(&endiandata[DCR_NONCE_OFT32], n);
endiandata[DECRED_NONCE_INDEX] = n;
decred_hash(hash32, endiandata);
if (hash32[7] <= HTarget && fulltest(hash32, ptarget))
{
pdata[DECRED_NONCE_INDEX] = n;
submit_solution( work, hash32, mythr );
if (hash32[7] <= HTarget && fulltest(hash32, ptarget)) {
work_set_target_ratio(work, hash32);
*hashes_done = n - first_nonce + 1;
#ifdef DEBUG_ALGO
applog(LOG_BLUE, "Nonce : %08x %08x", n, swab32(n));
applog_hash(ptarget);
applog_compare_hash(hash32, ptarget);
#endif
pdata[DECRED_NONCE_INDEX] = n;
return 1;
}
n++;

View File

@@ -4,7 +4,7 @@
#include <stdlib.h>
#include <memory.h>
#include <math.h>
#include "simd-utils.h"
#include "sph_gost.h"
#ifdef __cplusplus
@@ -696,26 +696,9 @@ static void AddModulo512(const void *a,const void *b,void *c)
static void AddXor512(const void *a,const void *b,void *c)
{
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
casti_m512i( c, 0 ) = _mm512_xor_si512( casti_m512i( a, 0 ),
casti_m512i( b, 0 ) );
#elif defined(__AVX2__)
casti_m256i( c, 0 ) = _mm256_xor_si256( casti_m256i( a, 0 ),
casti_m256i( b, 0 ) );
casti_m256i( c, 1 ) = _mm256_xor_si256( casti_m256i( a, 1 ),
casti_m256i( b, 1 ) );
#elif defined(__SSE2__)
casti_m128i( c, 0 ) = _mm_xor_si128( casti_m128i( a, 0 ),
casti_m128i( b, 0 ) );
casti_m128i( c, 1 ) = _mm_xor_si128( casti_m128i( a, 1 ),
casti_m128i( b, 1 ) );
casti_m128i( c, 2 ) = _mm_xor_si128( casti_m128i( a, 2 ),
casti_m128i( b, 2 ) );
casti_m128i( c, 3 ) = _mm_xor_si128( casti_m128i( a, 3 ),
casti_m128i( b, 3 ) );
#else
const unsigned long long *A=a, *B=b;
const unsigned long long *A=a, *B=b;
unsigned long long *C=c;
#ifdef FULL_UNROLL
C[0] = A[0] ^ B[0];
C[1] = A[1] ^ B[1];
C[2] = A[2] ^ B[2];
@@ -724,6 +707,12 @@ static void AddXor512(const void *a,const void *b,void *c)
C[5] = A[5] ^ B[5];
C[6] = A[6] ^ B[6];
C[7] = A[7] ^ B[7];
#else
int i = 0;
for(i=0; i<8; i++) {
C[i] = A[i] ^ B[i];
}
#endif
}
@@ -904,32 +893,31 @@ static void g_N(const unsigned char *N,unsigned char *h,const unsigned char *m)
static void hash_X(unsigned char *IV,const unsigned char *message,unsigned long long length,unsigned char *out)
{
unsigned char v512[64] __attribute__((aligned(64))) = {
unsigned char v512[64] = {
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00
};
unsigned char v0[64] __attribute__((aligned(64))) = {
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
};
unsigned char Sigma[64] __attribute__((aligned(64))) = {
};
unsigned char v0[64] = {
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
};
unsigned char N[64] __attribute__((aligned(64))) = {
unsigned char Sigma[64] = {
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
};
unsigned char m[64] __attribute__((aligned(64)));
unsigned char *hash = IV;
unsigned char N[64] = {
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
};
unsigned char m[64], *hash = IV;
unsigned long long len = length;
// Stage 2
@@ -964,7 +952,7 @@ static void hash_X(unsigned char *IV,const unsigned char *message,unsigned long
static void hash_512(const unsigned char *message, unsigned long long length, unsigned char *out)
{
unsigned char IV[64] __attribute__((aligned(64))) = {
unsigned char IV[64] = {
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

View File

@@ -81,9 +81,9 @@ typedef struct {
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[64] __attribute__((aligned(64)));
sph_u32 V[5][8] __attribute__((aligned(64)));
unsigned char buf[64]; /* first field, for alignment */
size_t ptr;
sph_u32 V[5][8];
#endif
} sph_gost512_context;

View File

@@ -67,12 +67,8 @@ HashReturn_gr init_groestl( hashState_groestl* ctx, int hashlen )
ctx->chaining[i] = _mm_setzero_si128();
ctx->buffer[i] = _mm_setzero_si128();
}
// The only non-zero in the IV is len. It can be hard coded.
ctx->chaining[ 6 ] = m128_const_64( 0x0200000000000000, 0 );
// ((u64*)ctx->chaining)[COLS-1] = U64BIG((u64)LENGTH);
// INIT(ctx->chaining);
((u64*)ctx->chaining)[COLS-1] = U64BIG((u64)LENGTH);
INIT(ctx->chaining);
ctx->buf_ptr = 0;
ctx->rem_ptr = 0;

View File

@@ -42,12 +42,9 @@ int groestl512_4way_init( groestl512_4way_context* ctx, uint64_t hashlen )
ctx->buffer[i] = m512_zero;
}
// The only non-zero in the IV is len. It can be hard coded.
ctx->chaining[ 6 ] = m512_const2_64( 0x0200000000000000, 0 );
// uint64_t len = U64BIG((uint64_t)LENGTH);
// ctx->chaining[ COLS/2 -1 ] = _mm512_set4_epi64( len, 0, len, 0 );
// INIT_4way(ctx->chaining);
uint64_t len = U64BIG((uint64_t)LENGTH);
ctx->chaining[ COLS/2 -1 ] = _mm512_set4_epi64( len, 0, len, 0 );
INIT_4way(ctx->chaining);
ctx->buf_ptr = 0;
ctx->rem_ptr = 0;

View File

@@ -115,7 +115,7 @@ __m512i ALL_FF;
\
/* compute z_i : double x_i using temp xmm8 and 1B xmm9 */\
/* compute w_i : add y_{i+4} */\
b1 = m512_const1_64( 0x1b1b1b1b1b1b1b1b );\
b1 = ALL_1B;\
MUL2(a0, b0, b1);\
a0 = _mm512_xor_si512(a0, TEMP0);\
MUL2(a1, b0, b1);\
@@ -276,7 +276,7 @@ __m512i ALL_FF;
for ( round_counter = 0; round_counter < 14; round_counter += 2) \
{ \
/* AddRoundConstant Q1024 */\
xmm1 = m512_neg1;\
xmm1 = ALL_FF;\
xmm8 = _mm512_xor_si512( xmm8, xmm1 );\
xmm9 = _mm512_xor_si512( xmm9, xmm1 );\
xmm10 = _mm512_xor_si512( xmm10, xmm1 );\
@@ -298,7 +298,7 @@ __m512i ALL_FF;
SUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
\
/* AddRoundConstant Q1024 */\
xmm9 = m512_neg1;\
xmm9 = ALL_FF;\
xmm0 = _mm512_xor_si512( xmm0, xmm9 );\
xmm1 = _mm512_xor_si512( xmm1, xmm9 );\
xmm2 = _mm512_xor_si512( xmm2, xmm9 );\

View File

@@ -150,8 +150,10 @@ int scanhash_bastion( struct work *work, uint32_t max_nonce,
be32enc(&endiandata[19], n);
bastionhash(hash32, endiandata);
if (hash32[7] < Htarg && fulltest(hash32, ptarget)) {
work_set_target_ratio(work, hash32);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
submit_solution( work, hash32, mythr );
return true;
}
n++;

View File

@@ -117,6 +117,9 @@ int scanhash_jha( struct work *work, uint32_t max_nonce,
jha_kec_midstate( endiandata );
#ifdef DEBUG_ALGO
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for (int m=0; m < 6; m++) {
if (Htarg <= htmax[m]) {
uint32_t mask = masks[m];
@@ -124,9 +127,25 @@ int scanhash_jha( struct work *work, uint32_t max_nonce,
pdata[19] = ++n;
be32enc(&endiandata[19], n);
jha_hash(hash32, endiandata);
if ((!(hash32[7] & mask)) && fulltest(hash32, ptarget))
submit_solution( work, hash32, mythr );
#ifndef DEBUG_ALGO
if ((!(hash32[7] & mask)) && fulltest(hash32, ptarget)) {
work_set_target_ratio(work, hash32);
*hashes_done = n - first_nonce + 1;
return 1;
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash32[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash32, ptarget)) {
work_set_target_ratio(work, hash32);
*hashes_done = n - first_nonce + 1;
return 1;
}
}
#endif
} while (n < max_nonce && !work_restart[thr_id].restart);
// see blake.c if else to understand the loop on htmax => mask
break;
}
}

View File

@@ -196,6 +196,7 @@ inline void reducedDuplexRowSetup_2way( uint64_t *State, uint64_t *rowIn,
__m512i* in = (__m512i*)rowIn;
__m512i* inout = (__m512i*)rowInOut;
__m512i* out = (__m512i*)rowOut + ( (nCols-1) * BLOCK_LEN_M256I );
__m512i t0, t1, t2;
state0 = _mm512_load_si512( (__m512i*)State );
state1 = _mm512_load_si512( (__m512i*)State + 1 );
@@ -217,27 +218,24 @@ inline void reducedDuplexRowSetup_2way( uint64_t *State, uint64_t *rowIn,
out[1] = _mm512_xor_si512( state1, in[1] );
out[2] = _mm512_xor_si512( state2, in[2] );
{
register __m512i t0, t1, t2;
//M[row*][col] = M[row*][col] XOR rotW(rand)
t0 = _mm512_permutex_epi64( state0, 0x93 );
t1 = _mm512_permutex_epi64( state1, 0x93 );
t2 = _mm512_permutex_epi64( state2, 0x93 );
//M[row*][col] = M[row*][col] XOR rotW(rand)
t0 = _mm512_permutex_epi64( state0, 0x93 );
t1 = _mm512_permutex_epi64( state1, 0x93 );
t2 = _mm512_permutex_epi64( state2, 0x93 );
inout[0] = _mm512_xor_si512( inout[0],
_mm512_mask_blend_epi64( 0x11, t0, t2 ) );
inout[1] = _mm512_xor_si512( inout[1],
_mm512_mask_blend_epi64( 0x11, t1, t0 ) );
inout[2] = _mm512_xor_si512( inout[2],
_mm512_mask_blend_epi64( 0x11, t2, t1 ) );
}
inout[0] = _mm512_xor_si512( inout[0],
_mm512_mask_blend_epi32( 0x0303, t0, t2 ) );
inout[1] = _mm512_xor_si512( inout[1],
_mm512_mask_blend_epi32( 0x0303, t1, t0 ) );
inout[2] = _mm512_xor_si512( inout[2],
_mm512_mask_blend_epi32( 0x0303, t2, t1 ) );
//Inputs: next column (i.e., next block in sequence)
in += BLOCK_LEN_M256I;
inout += BLOCK_LEN_M256I;
//Output: goes to previous column
out -= BLOCK_LEN_M256I;
//Inputs: next column (i.e., next block in sequence)
in += BLOCK_LEN_M256I;
inout += BLOCK_LEN_M256I;
//Output: goes to previous column
out -= BLOCK_LEN_M256I;
}
_mm512_store_si512( (__m512i*)State, state0 );
@@ -246,6 +244,9 @@ inline void reducedDuplexRowSetup_2way( uint64_t *State, uint64_t *rowIn,
_mm512_store_si512( (__m512i*)State + 3, state3 );
}
// big ugly workaound for pointer aliasing, use a union of pointers.
// Access matrix using m512i for in and out, m256i for inout
inline void reducedDuplexRow_2way( uint64_t *State, uint64_t *rowIn,
uint64_t *rowInOut0, uint64_t *rowInOut1,
uint64_t *rowOut, uint64_t nCols)
@@ -256,81 +257,95 @@ inline void reducedDuplexRow_2way( uint64_t *State, uint64_t *rowIn,
__m256i *inout0 = (__m256i*)rowInOut0;
__m256i *inout1 = (__m256i*)rowInOut1;
__m512i *out = (__m512i*)rowOut;
register __m512i io0, io1, io2;
__m512i io[3];
povly inout;
inout.v512 = &io[0];
__m512i t0, t1, t2;
state0 = _mm512_load_si512( (__m512i*)State );
state1 = _mm512_load_si512( (__m512i*)State + 1 );
state2 = _mm512_load_si512( (__m512i*)State + 2 );
state3 = _mm512_load_si512( (__m512i*)State + 3 );
for ( i = 0; i < nCols; i++ )
{
//Absorbing "M[prev] [+] M[row*]"
io0 = _mm512_mask_blend_epi64( 0xf0,
_mm512_load_si512( (__m512i*)inout0 ),
_mm512_load_si512( (__m512i*)inout1 ) );
io1 = _mm512_mask_blend_epi64( 0xf0,
_mm512_load_si512( (__m512i*)inout0 +1 ),
_mm512_load_si512( (__m512i*)inout1 +1 ) );
io2 = _mm512_mask_blend_epi64( 0xf0,
_mm512_load_si512( (__m512i*)inout0 +2 ),
_mm512_load_si512( (__m512i*)inout1 +2 ) );
_mm_prefetch( in, _MM_HINT_T0 );
_mm_prefetch( inout0, _MM_HINT_T0 );
_mm_prefetch( inout1, _MM_HINT_T0 );
_mm_prefetch( in + 2, _MM_HINT_T0 );
_mm_prefetch( inout0 + 2, _MM_HINT_T0 );
_mm_prefetch( inout1 + 2, _MM_HINT_T0 );
_mm_prefetch( in + 4, _MM_HINT_T0 );
_mm_prefetch( inout0 + 4, _MM_HINT_T0 );
_mm_prefetch( inout1 + 4, _MM_HINT_T0 );
_mm_prefetch( in + 6, _MM_HINT_T0 );
_mm_prefetch( inout0 + 6, _MM_HINT_T0 );
_mm_prefetch( inout1 + 6, _MM_HINT_T0 );
state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io0 ) );
state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io1 ) );
state2 = _mm512_xor_si512( state2, _mm512_add_epi64( in[2], io2 ) );
for ( i = 0; i < nCols; i++ )
{
//Applies the reduced-round transformation f to the sponge's state
LYRA_ROUND_2WAY_AVX512( state0, state1, state2, state3 );
//Absorbing "M[prev] [+] M[row*]"
inout.v256[0] = inout0[0];
inout.v256[1] = inout1[1];
inout.v256[2] = inout0[2];
inout.v256[3] = inout1[3];
inout.v256[4] = inout0[4];
inout.v256[5] = inout1[5];
{
register __m512i t0, t1, t2;
state0 = _mm512_xor_si512( state0,
_mm512_add_epi64( in[0], inout.v512[0] ) );
state1 = _mm512_xor_si512( state1,
_mm512_add_epi64( in[1], inout.v512[1] ) );
state2 = _mm512_xor_si512( state2,
_mm512_add_epi64( in[2], inout.v512[2] ) );
//M[rowOut][col] = M[rowOut][col] XOR rand
t0 = _mm512_xor_si512( out[0], state0 );
t1 = _mm512_xor_si512( out[1], state1 );
t2 = _mm512_xor_si512( out[2], state2 );
// if out is the same row as inout, update with new data.
if ( rowOut == rowInOut0 )
{
io0 = _mm512_mask_blend_epi64( 0x0f, io0, t0 );
io1 = _mm512_mask_blend_epi64( 0x0f, io1, t1 );
io2 = _mm512_mask_blend_epi64( 0x0f, io2, t2 );
}
if ( rowOut == rowInOut1 )
{
io0 = _mm512_mask_blend_epi64( 0xf0, io0, t0 );
io1 = _mm512_mask_blend_epi64( 0xf0, io1, t1 );
io2 = _mm512_mask_blend_epi64( 0xf0, io2, t2 );
}
//Applies the reduced-round transformation f to the sponge's state
LYRA_ROUND_2WAY_AVX512( state0, state1, state2, state3 );
out[0] = t0;
out[1] = t1;
out[2] = t2;
//M[rowOut][col] = M[rowOut][col] XOR rand
out[0] = _mm512_xor_si512( out[0], state0 );
out[1] = _mm512_xor_si512( out[1], state1 );
out[2] = _mm512_xor_si512( out[2], state2 );
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
t0 = _mm512_permutex_epi64( state0, 0x93 );
t1 = _mm512_permutex_epi64( state1, 0x93 );
t2 = _mm512_permutex_epi64( state2, 0x93 );
// if inout is the same row as out it was just overwritten, reload.
if ( rowOut == rowInOut0 )
{
inout.v256[0] = inout0[0];
inout.v256[2] = inout0[2];
inout.v256[4] = inout0[4];
}
if ( rowOut == rowInOut1 )
{
inout.v256[1] = inout1[1];
inout.v256[3] = inout1[3];
inout.v256[5] = inout1[5];
}
io0 = _mm512_xor_si512( io0, _mm512_mask_blend_epi64( 0x11, t0, t2 ) );
io1 = _mm512_xor_si512( io1, _mm512_mask_blend_epi64( 0x11, t1, t0 ) );
io2 = _mm512_xor_si512( io2, _mm512_mask_blend_epi64( 0x11, t2, t1 ) );
}
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
t0 = _mm512_permutex_epi64( state0, 0x93 );
t1 = _mm512_permutex_epi64( state1, 0x93 );
t2 = _mm512_permutex_epi64( state2, 0x93 );
_mm512_mask_store_epi64( (__m512i*)inout0, 0x0f, io0 );
_mm512_mask_store_epi64( (__m512i*)inout1, 0xf0, io0 );
_mm512_mask_store_epi64( (__m512i*)inout0 +1, 0x0f, io1 );
_mm512_mask_store_epi64( (__m512i*)inout1 +1, 0xf0, io1 );
_mm512_mask_store_epi64( (__m512i*)inout0 +2, 0x0f, io2 );
_mm512_mask_store_epi64( (__m512i*)inout1 +2, 0xf0, io2 );
inout.v512[0] = _mm512_xor_si512( inout.v512[0],
_mm512_mask_blend_epi32( 0x0303, t0, t2 ) );
inout.v512[1] = _mm512_xor_si512( inout.v512[1],
_mm512_mask_blend_epi32( 0x0303, t1, t0 ) );
inout.v512[2] = _mm512_xor_si512( inout.v512[2],
_mm512_mask_blend_epi32( 0x0303, t2, t1 ) );
inout0[0] = inout.v256[0];
inout1[1] = inout.v256[1];
inout0[2] = inout.v256[2];
inout1[3] = inout.v256[3];
inout0[4] = inout.v256[4];
inout1[5] = inout.v256[5];
//Goes to next block
in += BLOCK_LEN_M256I;
inout0 += BLOCK_LEN_M256I * 2;
inout1 += BLOCK_LEN_M256I * 2;
out += BLOCK_LEN_M256I;
//Goes to next block
in += BLOCK_LEN_M256I;
inout0 += BLOCK_LEN_M256I * 2;
inout1 += BLOCK_LEN_M256I * 2;
out += BLOCK_LEN_M256I;
}
_mm512_store_si512( (__m512i*)State, state0 );

View File

@@ -25,6 +25,7 @@ void nist5hash(void *output, const void *input)
sph_skein512_context ctx_skein;
sph_jh512_context ctx_jh;
sph_keccak512_context ctx_keccak;
uint32_t mask = 8;
sph_blake512_init( &ctx_blake );
sph_blake512( &ctx_blake, input, 80 );
@@ -58,10 +59,10 @@ void nist5hash(void *output, const void *input)
int scanhash_nist5( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
@@ -87,6 +88,9 @@ int scanhash_nist5( struct work *work, uint32_t max_nonce,
// we need bigendian data...
swab32_array( endiandata, pdata, 20 );
#ifdef DEBUG_ALGO
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for (int m=0; m < 6; m++) {
if (Htarg <= htmax[m]) {
uint32_t mask = masks[m];
@@ -94,9 +98,24 @@ int scanhash_nist5( struct work *work, uint32_t max_nonce,
pdata[19] = ++n;
be32enc(&endiandata[19], n);
nist5hash(hash64, endiandata);
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget))
submit_solution( work, hash64, mythr );
#ifndef DEBUG_ALGO
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash64[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash64, ptarget)) {
work_set_target_ratio( work, hash64 );
*hashes_done = n - first_nonce + 1;
return true;
}
}
#endif
} while (n < max_nonce && !work_restart[thr_id].restart);
// see blake.c if else to understand the loop on htmax => mask
break;
}
}

View File

@@ -142,7 +142,11 @@ int scanhash_zr5( struct work *work, uint32_t max_nonce,
{
pdata[0] = tmpdata[0];
pdata[19] = nonce;
submit_solution( work, hash, mythr );
*hashes_done = pdata[19] - first_nonce + 1;
work_set_target_ratio( work, hash );
if (opt_debug)
applog(LOG_INFO, "found nonce %x", nonce);
return 1;
}
}
nonce++;

View File

@@ -6,25 +6,26 @@
#define M17( macro ) \
do { \
macro( 0, 1, 2, 4 ); \
macro( 1, 2, 3, 5 ); \
macro( 2, 3, 4, 6 ); \
macro( 3, 4, 5, 7 ); \
macro( 4, 5, 6, 8 ); \
macro( 5, 6, 7, 9 ); \
macro( 6, 7, 8, 10 ); \
macro( 7, 8, 9, 11 ); \
macro( 8, 9, 10, 12 ); \
macro( 9, 10, 11, 13 ); \
macro( 10, 11, 12, 14 ); \
macro( 11, 12, 13, 15 ); \
macro( 12, 13, 14, 16 ); \
macro( 13, 14, 15, 0 ); \
macro( 14, 15, 16, 1 ); \
macro( 15, 16, 0, 2 ); \
macro( 16, 0, 1, 3 ); \
macro( 0, 1, 2, 4); \
macro( 1, 2, 3, 5); \
macro( 2, 3, 4, 6); \
macro( 3, 4, 5, 7); \
macro( 4, 5, 6, 8); \
macro( 5, 6, 7, 9); \
macro( 6, 7, 8, 10); \
macro( 7, 8, 9, 11); \
macro( 8, 9, 10, 12); \
macro( 9, 10, 11, 13); \
macro(10, 11, 12, 14); \
macro(11, 12, 13, 15); \
macro(12, 13, 14, 16); \
macro(13, 14, 15, 0); \
macro(14, 15, 16, 1); \
macro(15, 16, 0, 2); \
macro(16, 0, 1, 3); \
} while (0)
#define RSTATE(n0, n1, n2, n4) (a ## n0 = sc->state[n0])
#define WSTATE(n0, n1, n2, n4) (sc->state[n0] = a ## n0)
@@ -49,7 +50,9 @@ do { \
#define LVARS_4W \
LVAR17_4W(a) \
LVAR17_4W(g)
LVAR17_4W(g) \
LVAR17_4W(p) \
LVAR17_4W(t)
#define BUPDATE1_4W( n0, n2 ) \
do { \
@@ -60,14 +63,14 @@ do { \
#define BUPDATE_4W \
do { \
BUPDATE1_4W( 0, 2 ); \
BUPDATE1_4W( 1, 3 ); \
BUPDATE1_4W( 2, 4 ); \
BUPDATE1_4W( 3, 5 ); \
BUPDATE1_4W( 4, 6 ); \
BUPDATE1_4W( 5, 7 ); \
BUPDATE1_4W( 6, 0 ); \
BUPDATE1_4W( 7, 1 ); \
BUPDATE1_4W(0, 2); \
BUPDATE1_4W(1, 3); \
BUPDATE1_4W(2, 4); \
BUPDATE1_4W(3, 5); \
BUPDATE1_4W(4, 6); \
BUPDATE1_4W(5, 7); \
BUPDATE1_4W(6, 0); \
BUPDATE1_4W(7, 1); \
} while (0)
#define GAMMA_4W(n0, n1, n2, n4) \
@@ -75,46 +78,46 @@ do { \
_mm_or_si128( a ## n1, mm128_not( a ## n2 ) ) ) )
#define PI_ALL_4W do { \
a0 = g0; \
a1 = mm128_rol_32( g7, 1 ); \
a2 = mm128_rol_32( g14, 3 ); \
a3 = mm128_rol_32( g4, 6 ); \
a4 = mm128_rol_32( g11, 10 ); \
a5 = mm128_rol_32( g1, 15 ); \
a6 = mm128_rol_32( g8, 21 ); \
a7 = mm128_rol_32( g15, 28 ); \
a8 = mm128_rol_32( g5, 4 ); \
a9 = mm128_rol_32( g12, 13 ); \
a10 = mm128_rol_32( g2, 23 ); \
a11 = mm128_rol_32( g9, 2 ); \
a12 = mm128_rol_32( g16, 14 ); \
a13 = mm128_rol_32( g6, 27 ); \
a14 = mm128_rol_32( g13, 9 ); \
a15 = mm128_rol_32( g3, 24 ); \
a16 = mm128_rol_32( g10, 8 ); \
p0 = g0; \
p1 = mm128_rol_32( g7, 1 ); \
p2 = mm128_rol_32( g14, 3 ); \
p3 = mm128_rol_32( g4, 6 ); \
p4 = mm128_rol_32( g11, 10 ); \
p5 = mm128_rol_32( g1, 15 ); \
p6 = mm128_rol_32( g8, 21 ); \
p7 = mm128_rol_32( g15, 28 ); \
p8 = mm128_rol_32( g5, 4 ); \
p9 = mm128_rol_32( g12, 13 ); \
p10 = mm128_rol_32( g2, 23 ); \
p11 = mm128_rol_32( g9, 2 ); \
p12 = mm128_rol_32( g16, 14 ); \
p13 = mm128_rol_32( g6, 27 ); \
p14 = mm128_rol_32( g13, 9 ); \
p15 = mm128_rol_32( g3, 24 ); \
p16 = mm128_rol_32( g10, 8 ); \
} while (0)
#define THETA_4W(n0, n1, n2, n4) \
( g ## n0 = _mm_xor_si128( a ## n0, _mm_xor_si128( a ## n1, a ## n4 ) ) )
( t ## n0 = _mm_xor_si128( p ## n0, _mm_xor_si128( p ## n1, p ## n4 ) ) )
#define SIGMA_ALL_4W do { \
a0 = _mm_xor_si128( g0, m128_one_32 ); \
a1 = _mm_xor_si128( g1, INW2( 0 ) ); \
a2 = _mm_xor_si128( g2, INW2( 1 ) ); \
a3 = _mm_xor_si128( g3, INW2( 2 ) ); \
a4 = _mm_xor_si128( g4, INW2( 3 ) ); \
a5 = _mm_xor_si128( g5, INW2( 4 ) ); \
a6 = _mm_xor_si128( g6, INW2( 5 ) ); \
a7 = _mm_xor_si128( g7, INW2( 6 ) ); \
a8 = _mm_xor_si128( g8, INW2( 7 ) ); \
a9 = _mm_xor_si128( g9, sc->buffer[ ptr16 ][0] ); \
a10 = _mm_xor_si128( g10, sc->buffer[ ptr16 ][1] ); \
a11 = _mm_xor_si128( g11, sc->buffer[ ptr16 ][2] ); \
a12 = _mm_xor_si128( g12, sc->buffer[ ptr16 ][3] ); \
a13 = _mm_xor_si128( g13, sc->buffer[ ptr16 ][4] ); \
a14 = _mm_xor_si128( g14, sc->buffer[ ptr16 ][5] ); \
a15 = _mm_xor_si128( g15, sc->buffer[ ptr16 ][6] ); \
a16 = _mm_xor_si128( g16, sc->buffer[ ptr16 ][7] ); \
a0 = _mm_xor_si128( t0, _mm_set1_epi32( 1 ) ); \
a1 = _mm_xor_si128( t1, INW2( 0 ) ); \
a2 = _mm_xor_si128( t2, INW2( 1 ) ); \
a3 = _mm_xor_si128( t3, INW2( 2 ) ); \
a4 = _mm_xor_si128( t4, INW2( 3 ) ); \
a5 = _mm_xor_si128( t5, INW2( 4 ) ); \
a6 = _mm_xor_si128( t6, INW2( 5 ) ); \
a7 = _mm_xor_si128( t7, INW2( 6 ) ); \
a8 = _mm_xor_si128( t8, INW2( 7 ) ); \
a9 = _mm_xor_si128( t9, sc->buffer[ ptr16 ] [0 ] ); \
a10 = _mm_xor_si128( t10, sc->buffer[ ptr16 ] [1 ] ); \
a11 = _mm_xor_si128( t11, sc->buffer[ ptr16 ] [2 ] ); \
a12 = _mm_xor_si128( t12, sc->buffer[ ptr16 ] [3 ] ); \
a13 = _mm_xor_si128( t13, sc->buffer[ ptr16 ] [4 ] ); \
a14 = _mm_xor_si128( t14, sc->buffer[ ptr16 ] [5 ] ); \
a15 = _mm_xor_si128( t15, sc->buffer[ ptr16 ] [6 ] ); \
a16 = _mm_xor_si128( t16, sc->buffer[ ptr16 ] [7 ] ); \
} while (0)
#define PANAMA_STEP_4W do { \
@@ -142,9 +145,9 @@ panama_4way_push( panama_4way_context *sc, const unsigned char *pbuf,
#define INW2(i) INW1(i)
M17( RSTATE );
ptr0 = sc->buffer_ptr;
while ( num-- > 0 )
{
while (num -- > 0) {
PANAMA_STEP_4W;
pbuf = (const unsigned char *)pbuf + 32*4;
}
@@ -170,11 +173,14 @@ panama_4way_pull( panama_4way_context *sc, unsigned num )
#define INW2(i) casti_m128i( sc->buffer[ptr4], i )
M17( RSTATE );
ptr0 = sc->buffer_ptr;
while ( num-- > 0 )
{
while (num -- > 0) {
unsigned ptr4;
ptr4 = ( (ptr0 + 4) & 31 );
PANAMA_STEP_4W;
}
M17( WSTATE );
@@ -186,11 +192,18 @@ panama_4way_pull( panama_4way_context *sc, unsigned num )
}
void
panama_4way_init( void *cc )
panama_4way_init(void *cc)
{
panama_4way_context *sc;
sc = cc;
/*
* This is not completely conformant, but "it will work
* everywhere". Initial state consists of zeroes everywhere.
* Conceptually, the sph_u32 type may have padding bits which
* must not be set to 0; but such an architecture remains to
* be seen.
*/
sc->data_ptr = 0;
memset( sc->buffer, 0, sizeof sc->buffer );
sc->buffer_ptr = 0;
@@ -204,8 +217,7 @@ panama_4way_short( void *cc, const void *data, size_t len )
unsigned current;
sc = cc;
current = sc->data_ptr;
while ( len > 0 )
{
while (len > 0) {
unsigned clen;
clen = ( (sizeof sc->data ) >> 2 ) - current;
@@ -271,8 +283,11 @@ panama_4way_close( void *cc, void *dst )
*(__m128i*)( sc->data + current ) = m128_one_32;
current++;
memset_zero_128( (__m128i*)sc->data + current, 32 - current );
panama_4way_push( sc, sc->data, 1 );
panama_4way_pull( sc, 32 );
for ( i = 0; i < 8; i ++ )
casti_m128i( dst, i ) = sc->state[i + 9];
}
@@ -291,7 +306,9 @@ panama_4way_close( void *cc, void *dst )
#define LVARS_8W \
LVAR17_8W(a) \
LVAR17_8W(g)
LVAR17_8W(g) \
LVAR17_8W(p) \
LVAR17_8W(t)
#define BUPDATE1_8W( n0, n2 ) \
do { \
@@ -302,14 +319,14 @@ do { \
#define BUPDATE_8W \
do { \
BUPDATE1_8W( 0, 2 ); \
BUPDATE1_8W( 1, 3 ); \
BUPDATE1_8W( 2, 4 ); \
BUPDATE1_8W( 3, 5 ); \
BUPDATE1_8W( 4, 6 ); \
BUPDATE1_8W( 5, 7 ); \
BUPDATE1_8W( 6, 0 ); \
BUPDATE1_8W( 7, 1 ); \
BUPDATE1_8W(0, 2); \
BUPDATE1_8W(1, 3); \
BUPDATE1_8W(2, 4); \
BUPDATE1_8W(3, 5); \
BUPDATE1_8W(4, 6); \
BUPDATE1_8W(5, 7); \
BUPDATE1_8W(6, 0); \
BUPDATE1_8W(7, 1); \
} while (0)
#define GAMMA_8W(n0, n1, n2, n4) \
@@ -317,47 +334,46 @@ do { \
_mm256_or_si256( a ## n1, mm256_not( a ## n2 ) ) ) )
#define PI_ALL_8W do { \
a0 = g0; \
a1 = mm256_rol_32( g7, 1 ); \
a2 = mm256_rol_32( g14, 3 ); \
a3 = mm256_rol_32( g4, 6 ); \
a4 = mm256_rol_32( g11, 10 ); \
a5 = mm256_rol_32( g1, 15 ); \
a6 = mm256_rol_32( g8, 21 ); \
a7 = mm256_rol_32( g15, 28 ); \
a8 = mm256_rol_32( g5, 4 ); \
a9 = mm256_rol_32( g12, 13 ); \
a10 = mm256_rol_32( g2, 23 ); \
a11 = mm256_rol_32( g9, 2 ); \
a12 = mm256_rol_32( g16, 14 ); \
a13 = mm256_rol_32( g6, 27 ); \
a14 = mm256_rol_32( g13, 9 ); \
a15 = mm256_rol_32( g3, 24 ); \
a16 = mm256_rol_32( g10, 8 ); \
p0 = g0; \
p1 = mm256_rol_32( g7, 1 ); \
p2 = mm256_rol_32( g14, 3 ); \
p3 = mm256_rol_32( g4, 6 ); \
p4 = mm256_rol_32( g11, 10 ); \
p5 = mm256_rol_32( g1, 15 ); \
p6 = mm256_rol_32( g8, 21 ); \
p7 = mm256_rol_32( g15, 28 ); \
p8 = mm256_rol_32( g5, 4 ); \
p9 = mm256_rol_32( g12, 13 ); \
p10 = mm256_rol_32( g2, 23 ); \
p11 = mm256_rol_32( g9, 2 ); \
p12 = mm256_rol_32( g16, 14 ); \
p13 = mm256_rol_32( g6, 27 ); \
p14 = mm256_rol_32( g13, 9 ); \
p15 = mm256_rol_32( g3, 24 ); \
p16 = mm256_rol_32( g10, 8 ); \
} while (0)
#define THETA_8W(n0, n1, n2, n4) \
( g ## n0 = _mm256_xor_si256( a ## n0, _mm256_xor_si256( a ## n1, \
a ## n4 ) ) )
( t ## n0 = _mm256_xor_si256( p ## n0, _mm256_xor_si256( p ## n1, p ## n4 ) ) )
#define SIGMA_ALL_8W do { \
a0 = _mm256_xor_si256( g0, m256_one_32 ); \
a1 = _mm256_xor_si256( g1, INW2( 0 ) ); \
a2 = _mm256_xor_si256( g2, INW2( 1 ) ); \
a3 = _mm256_xor_si256( g3, INW2( 2 ) ); \
a4 = _mm256_xor_si256( g4, INW2( 3 ) ); \
a5 = _mm256_xor_si256( g5, INW2( 4 ) ); \
a6 = _mm256_xor_si256( g6, INW2( 5 ) ); \
a7 = _mm256_xor_si256( g7, INW2( 6 ) ); \
a8 = _mm256_xor_si256( g8, INW2( 7 ) ); \
a9 = _mm256_xor_si256( g9, sc->buffer[ ptr16 ][0] ); \
a10 = _mm256_xor_si256( g10, sc->buffer[ ptr16 ][1] ); \
a11 = _mm256_xor_si256( g11, sc->buffer[ ptr16 ][2] ); \
a12 = _mm256_xor_si256( g12, sc->buffer[ ptr16 ][3] ); \
a13 = _mm256_xor_si256( g13, sc->buffer[ ptr16 ][4] ); \
a14 = _mm256_xor_si256( g14, sc->buffer[ ptr16 ][5] ); \
a15 = _mm256_xor_si256( g15, sc->buffer[ ptr16 ][6] ); \
a16 = _mm256_xor_si256( g16, sc->buffer[ ptr16 ][7] ); \
a0 = _mm256_xor_si256( t0, m256_one_32 ); \
a1 = _mm256_xor_si256( t1, INW2( 0 ) ); \
a2 = _mm256_xor_si256( t2, INW2( 1 ) ); \
a3 = _mm256_xor_si256( t3, INW2( 2 ) ); \
a4 = _mm256_xor_si256( t4, INW2( 3 ) ); \
a5 = _mm256_xor_si256( t5, INW2( 4 ) ); \
a6 = _mm256_xor_si256( t6, INW2( 5 ) ); \
a7 = _mm256_xor_si256( t7, INW2( 6 ) ); \
a8 = _mm256_xor_si256( t8, INW2( 7 ) ); \
a9 = _mm256_xor_si256( t9, sc->buffer[ ptr16 ] [0 ] ); \
a10 = _mm256_xor_si256( t10, sc->buffer[ ptr16 ] [1 ] ); \
a11 = _mm256_xor_si256( t11, sc->buffer[ ptr16 ] [2 ] ); \
a12 = _mm256_xor_si256( t12, sc->buffer[ ptr16 ] [3 ] ); \
a13 = _mm256_xor_si256( t13, sc->buffer[ ptr16 ] [4 ] ); \
a14 = _mm256_xor_si256( t14, sc->buffer[ ptr16 ] [5 ] ); \
a15 = _mm256_xor_si256( t15, sc->buffer[ ptr16 ] [6 ] ); \
a16 = _mm256_xor_si256( t16, sc->buffer[ ptr16 ] [7 ] ); \
} while (0)
#define PANAMA_STEP_8W do { \
@@ -385,6 +401,7 @@ panama_8way_push( panama_8way_context *sc, const unsigned char *pbuf,
#define INW2(i) INW1(i)
M17( RSTATE );
ptr0 = sc->buffer_ptr;
while ( num-- > 0 )
{
@@ -412,9 +429,9 @@ panama_8way_pull( panama_8way_context *sc, unsigned num )
ptr0 = sc->buffer_ptr;
while ( num-- > 0 )
{
while (num -- > 0) {
unsigned ptr4;
ptr4 = ( (ptr0 + 4) & 31 );
PANAMA_STEP_8W;
}
@@ -432,6 +449,13 @@ panama_8way_init( void *cc )
panama_8way_context *sc;
sc = cc;
/*
* This is not completely conformant, but "it will work
* everywhere". Initial state consists of zeroes everywhere.
* Conceptually, the sph_u32 type may have padding bits which
* must not be set to 0; but such an architecture remains to
* be seen.
*/
sc->data_ptr = 0;
memset( sc->buffer, 0, sizeof sc->buffer );
sc->buffer_ptr = 0;
@@ -445,8 +469,7 @@ panama_8way_short( void *cc, const void *data, size_t len )
unsigned current;
sc = cc;
current = sc->data_ptr;
while ( len > 0 )
{
while (len > 0) {
unsigned clen;
clen = ( (sizeof sc->data ) >> 3 ) - current;
@@ -463,6 +486,7 @@ panama_8way_short( void *cc, const void *data, size_t len )
panama_8way_push( sc, sc->data, 1 );
}
}
sc->data_ptr = current;
}
@@ -511,7 +535,9 @@ panama_8way_close( void *cc, void *dst )
*(__m256i*)( sc->data + current ) = m256_one_32;
current++;
memset_zero_256( (__m256i*)sc->data + current, 32 - current );
panama_8way_push( sc, sc->data, 1 );
panama_8way_pull( sc, 32 );
for ( i = 0; i < 8; i ++ )

View File

@@ -160,12 +160,16 @@ int scanhash_anime( struct work *work, uint32_t max_nonce,
pdata[19] = n;
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
submit_solution( work, hash, mythr );
{
work_set_target_ratio( work, hash );
*hashes_done = n - first_nonce + 1;
return true;
}
n++;
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
break;
}
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}

View File

@@ -330,8 +330,11 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
be32enc(&endiandata[19], n);
hmq1725hash(hash64, endiandata);
if (((hash64[7]&0xFFFFFFFF)==0) &&
fulltest(hash64, ptarget))
submit_solution( work, hash64, mythr );
fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
work_set_target_ratio( work, hash64 );
return true;
}
} while (n < max_nonce && !work_restart[thr_id].restart);
}
else if (ptarget[7]<=0xF)
@@ -341,8 +344,11 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
be32enc(&endiandata[19], n);
hmq1725hash(hash64, endiandata);
if (((hash64[7]&0xFFFFFFF0)==0) &&
fulltest(hash64, ptarget))
submit_solution( work, hash64, mythr );
fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
work_set_target_ratio( work, hash64 );
return true;
}
} while (n < max_nonce && !work_restart[thr_id].restart);
}
else if (ptarget[7]<=0xFF)
@@ -352,8 +358,11 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
be32enc(&endiandata[19], n);
hmq1725hash(hash64, endiandata);
if (((hash64[7]&0xFFFFFF00)==0) &&
fulltest(hash64, ptarget))
submit_solution( work, hash64, mythr );
fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
work_set_target_ratio( work, hash64 );
return true;
}
} while (n < max_nonce && !work_restart[thr_id].restart);
}
else if (ptarget[7]<=0xFFF)
@@ -363,9 +372,13 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
be32enc(&endiandata[19], n);
hmq1725hash(hash64, endiandata);
if (((hash64[7]&0xFFFFF000)==0) &&
fulltest(hash64, ptarget))
submit_solution( work, hash64, mythr );
fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
work_set_target_ratio( work, hash64 );
return true;
}
} while (n < max_nonce && !work_restart[thr_id].restart);
}
else if (ptarget[7]<=0xFFFF)
{
@@ -374,9 +387,13 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
be32enc(&endiandata[19], n);
hmq1725hash(hash64, endiandata);
if (((hash64[7]&0xFFFF0000)==0) &&
fulltest(hash64, ptarget))
submit_solution( work, hash64, mythr );
fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
work_set_target_ratio( work, hash64 );
return true;
}
} while (n < max_nonce && !work_restart[thr_id].restart);
}
else
{
@@ -384,10 +401,15 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
pdata[19] = ++n;
be32enc(&endiandata[19], n);
hmq1725hash(hash64, endiandata);
if (fulltest(hash64, ptarget))
submit_solution( work, hash64, mythr );
if (fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
work_set_target_ratio( work, hash64 );
return true;
}
} while (n < max_nonce && !work_restart[thr_id].restart);
}
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;

View File

@@ -126,11 +126,15 @@ int scanhash_quark( struct work *work, uint32_t max_nonce,
pdata[19] = ++n;
be32enc(&endiandata[19], n);
quark_hash(hash64, &endiandata);
if ((hash64[7]&0xFFFFFF00)==0)
{
if (fulltest(hash64, ptarget))
submit_solution( work, hash64, mythr );
}
if ((hash64[7]&0xFFFFFF00)==0)
{
if (fulltest(hash64, ptarget))
{
work_set_target_ratio( work, hash64 );
*hashes_done = n - first_nonce + 1;
return true;
}
}
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;

View File

@@ -92,21 +92,46 @@ int scanhash_deep( struct work *work, uint32_t max_nonce,
deep_luffa_midstate( endiandata );
#ifdef DEBUG_ALGO
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for ( int m=0; m < 6; m++ )
{
{
if ( Htarg <= htmax[m] )
{
{
uint32_t mask = masks[m];
do
{
{
pdata[19] = ++n;
be32enc( &endiandata[19], n );
deep_hash( hash64, endiandata );
if (!(hash64[7] & mask))
if ( fulltest(hash64, ptarget) )
submit_solution( work, hash64, mythr );
} while ( n < max_nonce && !work_restart[thr_id].restart );
break;
be32enc( &endiandata[19], n );
deep_hash( hash64, endiandata );
#ifndef DEBUG_ALGO
if (!(hash64[7] & mask))
{
if ( fulltest(hash64, ptarget) )
{
*hashes_done = n - first_nonce + 1;
return true;
}
// else
// {
// applog(LOG_INFO, "Result does not validate on CPU!");
// }
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash64[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash64, ptarget)) {
work_set_target_ratio( work, hash64 );
*hashes_done = n - first_nonce + 1;
return true;
}
}
#endif
} while ( n < max_nonce && !work_restart[thr_id].restart );
// see blake.c if else to understand the loop on htmax => mask
break;
}
}

View File

@@ -104,23 +104,48 @@ int scanhash_qubit( struct work *work, uint32_t max_nonce,
qubit_luffa_midstate( endiandata );
#ifdef DEBUG_ALGO
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for ( int m=0; m < 6; m++ )
{
{
if ( Htarg <= htmax[m] )
{
{
uint32_t mask = masks[m];
do
{
{
pdata[19] = ++n;
be32enc(&endiandata[19], n);
qubit_hash(hash64, endiandata);
if (!(hash64[7] & mask))
if ( fulltest(hash64, ptarget) )
submit_solution( work, hash64, mythr );
} while ( n < max_nonce && !work_restart[thr_id].restart );
break;
}
}
be32enc(&endiandata[19], n);
qubit_hash(hash64, endiandata);
#ifndef DEBUG_ALGO
if (!(hash64[7] & mask))
{
if ( fulltest(hash64, ptarget) )
{
*hashes_done = n - first_nonce + 1;
return true;
}
// else
// {
// applog(LOG_INFO, "Result does not validate on CPU!");
// }
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash64[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash64, ptarget)) {
work_set_target_ratio( work, hash64 );
*hashes_done = n - first_nonce + 1;
return true;
}
}
#endif
} while ( n < max_nonce && !work_restart[thr_id].restart );
// see blake.c if else to understand the loop on htmax => mask
break;
}
}
*hashes_done = n - first_nonce + 1;
pdata[19] = n;

View File

@@ -753,8 +753,10 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
for (i = 0; i < throughput; i++) {
if (unlikely(hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget))) {
*hashes_done = n - pdata[19] + 1;
pdata[19] = data[i * 20 + 19];
submit_solution( work, hash, mythr );
work_set_target_ratio( work, hash );
return 1;
}
}
} while (likely(n < max_nonce && !work_restart[thr_id].restart));

View File

@@ -1,8 +1,6 @@
#include "shavite-hash-4way.h"
#include <stdint.h>
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
static const uint32_t IV512[] =
{
0x72FCCDD8, 0x79CA4727, 0x128A077B, 0x40D55AEC,
@@ -11,6 +9,8 @@ static const uint32_t IV512[] =
0xE275EADE, 0x502D9FCD, 0xB9357178, 0x022A4B9A
};
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define mm512_ror2x512hi_1x32( a, b ) \
_mm512_mask_blend_epi32( 0x8888, mm512_ror128_32( a ), \
mm512_ror128_32( b ) )

View File

@@ -1035,7 +1035,7 @@ int simd_4way_update( simd_4way_context *state, const void *data,
while ( databitlen > 0 )
{
if ( ( current == 0 ) && ( databitlen >= bs ) )
if ( current == 0 && databitlen >= bs )
{
// We can hash the data directly from the input buffer.
SIMD_4way_Compress( state, data, 0 );
@@ -1049,13 +1049,13 @@ int simd_4way_update( simd_4way_context *state, const void *data,
int len = bs - current;
if ( databitlen < len )
{
memcpy( state->buffer + 4 * (current/8), data, 4 * (databitlen/8) );
memcpy( state->buffer + 4*(current/8), data, 4*((databitlen+7)/8) );
state->count += databitlen;
return 0;
}
else
{
memcpy( state->buffer + 4 * (current / 8), data, 4 * (len / 8) );
memcpy( state->buffer + 4*(current/8), data, 4*(len/8) );
state->count += len;
databitlen -= len;
data += 4*(len/8);
@@ -1128,7 +1128,7 @@ int simd_4way_update_close( simd_4way_context *state, void *hashval,
int len = bs - current;
if ( databitlen < len )
{
memcpy( state->buffer + 4*( current/8 ), data, 4*( (databitlen)/8 ) );
memcpy( state->buffer + 4*( current/8 ), data, 4*( (databitlen+7)/8 ) );
state->count += databitlen;
break;
}
@@ -1149,7 +1149,7 @@ int simd_4way_update_close( simd_4way_context *state, void *hashval,
// If there is still some data in the buffer, hash it
if ( current )
{
current = current / 8;
current = ( current+7 ) / 8;
memset( state->buffer + 4*current, 0, 4*( state->blocksize/8 - current) );
SIMD_4way_Compress( state, state->buffer, 0 );
}

View File

@@ -78,12 +78,17 @@ int scanhash_whirlpool( struct work* work, uint32_t max_nonce,
do {
const uint32_t Htarg = ptarget[7];
uint32_t vhash[8];
pdata[19] = ++n;
pdata[19] = ++n;
be32enc(&endiandata[19], n );
whirlpool_hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget))
submit_solution( work, vhash, mythr );
{
work_set_target_ratio(work, vhash);
*hashes_done = n - first_nonce + 1;
return true;
}
} while ( n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;

View File

@@ -48,7 +48,11 @@ int scanhash_whirlpoolx( struct work* work, uint32_t max_nonce,
whirlpoolx_hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget))
submit_solution( work, vhash, mythr );
{
work_set_target_ratio(work, vhash);
*hashes_done = n - first_nonce + 1;
return true;
}
} while ( n < max_nonce && !work_restart[thr_id].restart);

View File

@@ -138,12 +138,16 @@ int scanhash_c11( struct work *work, uint32_t max_nonce,
swab32_array( endiandata, pdata, 20 );
do
{
pdata[19] = nonce;
{
be32enc( &endiandata[19], nonce );
c11_hash( hash, endiandata );
if ( hash[7] <= Htarg && fulltest(hash, ptarget) )
submit_solution( work, hash, mythr );
{
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
work_set_target_ratio( work, hash );
return 1;
}
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;

View File

@@ -293,10 +293,14 @@ int scanhash_timetravel( struct work *work, uint32_t max_nonce,
if ( hash[7] <= Htarg && fulltest( hash, ptarget) )
{
work_set_target_ratio( work, hash );
pdata[19] = nonce;
submit_solution( work, hash, mythr );
}
nonce++;
*hashes_done = pdata[19] - first_nonce;
work_set_target_ratio( work, hash );
return 1;
}
nonce++;
} while (nonce < max_nonce && !(*restart));
pdata[19] = nonce;

View File

@@ -334,10 +334,14 @@ int scanhash_timetravel10( struct work *work, uint32_t max_nonce,
if ( hash[7] <= Htarg && fulltest( hash, ptarget) )
{
work_set_target_ratio( work, hash );
pdata[19] = nonce;
submit_solution( work, hash, mythr );
}
nonce++;
work_set_target_ratio( work, hash );
*hashes_done = pdata[19] - first_nonce;
return 1;
}
nonce++;
} while (nonce < max_nonce && !(*restart));
pdata[19] = nonce;

View File

@@ -98,6 +98,9 @@ int scanhash_tribus( struct work *work, uint32_t max_nonce,
sph_jh512_init( &tribus_ctx.jh );
sph_jh512( &tribus_ctx.jh, endiandata, 64 );
#ifdef DEBUG_ALGO
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for (int m=0; m < 6; m++) {
if (Htarg <= htmax[m]) {
uint32_t mask = masks[m];
@@ -105,9 +108,25 @@ int scanhash_tribus( struct work *work, uint32_t max_nonce,
pdata[19] = ++n;
be32enc(&endiandata[19], n);
tribus_hash(hash32, endiandata);
if ((!(hash32[7] & mask)) && fulltest(hash32, ptarget))
submit_solution( work, hash32, mythr );
#ifndef DEBUG_ALGO
if ((!(hash32[7] & mask)) && fulltest(hash32, ptarget)) {
work_set_target_ratio(work, hash32);
*hashes_done = n - first_nonce + 1;
return 1;
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash32[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash32, ptarget)) {
work_set_target_ratio(work, hash32);
*hashes_done = n - first_nonce + 1;
return 1;
}
}
#endif
} while (n < max_nonce && !work_restart[thr_id].restart);
// see blake.c if else to understand the loop on htmax => mask
break;
}
}

View File

@@ -163,7 +163,11 @@ int scanhash_x11( struct work *work, uint32_t max_nonce,
if ( ( hash64[7] & mask ) == 0 )
{
if ( fulltest( hash64, ptarget ) )
submit_solution( work, hash64, mythr );
{
*hashes_done = n - first_nonce + 1;
work_set_target_ratio( work, hash64 );
return true;
}
}
} while ( n < max_nonce && !work_restart[thr_id].restart );
}

View File

@@ -199,8 +199,12 @@ int scanhash_x11evo( struct work* work, uint32_t max_nonce,
if ( ( hash64[7] & hmask ) == 0 )
{
if ( fulltest( hash64, ptarget ) )
submit_solution( work, hash64, mythr );
}
{
*hashes_done = n - first_nonce + 1;
work_set_target_ratio( work, hash64 );
return true;
}
}
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;

View File

@@ -448,7 +448,6 @@ void x11gost_4way_hash( void *state, const void *input )
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
intrlv_2x128( vhash, hash2, hash3, 512 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
dintrlv_2x128( hash2, hash3, vhash, 512 );

View File

@@ -15,7 +15,7 @@ bool register_x11gost_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x11gost;
gate->hash = (void*)&x11gost_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
return true;
};

View File

@@ -128,8 +128,9 @@ void x11gost_hash(void *output, const void *input)
int scanhash_x11gost( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t _ALIGN(64) endiandata[20];
int thr_id = mythr->id; // thr_id arg is deprecated
@@ -148,13 +149,16 @@ int scanhash_x11gost( struct work *work, uint32_t max_nonce,
be32enc(&endiandata[19], nonce);
x11gost_hash(hash, endiandata);
if (hash[7] <= Htarg && fulltest(hash, ptarget))
{
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
pdata[19] = nonce;
submit_solution( work, hash, mythr );
*hashes_done = pdata[19] - first_nonce;
work_set_target_ratio( work, hash );
return 1;
}
nonce++;
} while (nonce < max_nonce && !(*restart));
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;

View File

@@ -158,6 +158,9 @@ int scanhash_x12( struct work *work, uint32_t max_nonce,
// we need bigendian data...
swab32_array( endiandata, pdata, 20 );
#ifdef DEBUG_ALGO
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for (int m=0; m < 6; m++) {
if (Htarg <= htmax[m]) {
uint32_t mask = masks[m];
@@ -165,10 +168,33 @@ int scanhash_x12( struct work *work, uint32_t max_nonce,
pdata[19] = ++n;
be32enc(&endiandata[19], n);
x12hash(hash64, endiandata);
#ifndef DEBUG_ALGO
if (!(hash64[7] & mask))
if ( fulltest(hash64, ptarget) )
submit_solution( work, hash64, mythr );
{
if ( fulltest(hash64, ptarget) )
{
*hashes_done = n - first_nonce + 1;
return true;
}
// else
// {
// applog(LOG_INFO, "Result does not validate on CPU!");
// }
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash64[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash64, ptarget)) {
work_set_target_ratio( work, hash );
*hashes_done = n - first_nonce + 1;
return true;
}
}
#endif
} while (n < max_nonce && !work_restart[thr_id].restart);
// see blake.c if else to understand the loop on htmax => mask
break;
}
}

View File

@@ -115,10 +115,11 @@ int scanhash_phi1612( struct work *work, uint32_t max_nonce,
be32enc(&endiandata[19], nonce);
phi1612_hash(hash, endiandata);
if (hash[7] <= Htarg && fulltest(hash, ptarget))
{
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
pdata[19] = nonce;
submit_solution( work, hash, mythr );
work_set_target_ratio( work, hash );
*hashes_done = pdata[19] - first_nonce;
return 1;
}
nonce++;

View File

@@ -68,9 +68,11 @@ int scanhash_skunk( struct work *work, uint32_t max_nonce,
skunkhash( hash, endiandata );
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
{
pdata[19] = nonce;
submit_solution( work, hash, mythr );
{
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
work_set_target_ratio( work, hash );
return 1;
}
nonce++;
} while ( nonce < max_nonce && !(*restart) );

View File

@@ -161,8 +161,11 @@ int scanhash_x13( struct work *work, uint32_t max_nonce,
};
// we need bigendian data...
swab32_array( endiandata, pdata, 20 );
swab32_array( endiandata, pdata, 20 );
#ifdef DEBUG_ALGO
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for (int m=0; m < 6; m++) {
if (Htarg <= htmax[m]) {
uint32_t mask = masks[m];
@@ -170,11 +173,31 @@ int scanhash_x13( struct work *work, uint32_t max_nonce,
pdata[19] = ++n;
be32enc(&endiandata[19], n);
x13hash(hash64, endiandata);
#ifndef DEBUG_ALGO
if (!(hash64[7] & mask))
{
if ( fulltest(hash64, ptarget) )
submit_solution( work, hash64, mythr );
}
{
if ( fulltest(hash64, ptarget) )
{
*hashes_done = n - first_nonce + 1;
return true;
}
// else
// {
// applog(LOG_INFO, "Result does not validate on CPU!");
// }
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash64[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash64, ptarget)) {
work_set_target_ratio( work, hash );
*hashes_done = n - first_nonce + 1;
return true;
}
}
#endif
} while (n < max_nonce && !work_restart[thr_id].restart);
// see blake.c if else to understand the loop on htmax => mask
break;

View File

@@ -165,6 +165,10 @@ int scanhash_x13bcd( struct work *work, uint32_t max_nonce,
// we need bigendian data...
swab32_array( endiandata, pdata, 20 );
#ifdef DEBUG_ALGO
if (Htarg != 0)
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for (int m=0; m < 6; m++) {
if (Htarg <= htmax[m]) {
uint32_t mask = masks[m];
@@ -172,9 +176,24 @@ int scanhash_x13bcd( struct work *work, uint32_t max_nonce,
pdata[19] = ++n;
be32enc(&endiandata[19], n);
x13bcd_hash(hash64, endiandata);
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget))
submit_solution( work, hash64, mythr );
#ifndef DEBUG_ALGO
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash64[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash64, ptarget)) {
work_set_target_ratio( work, hash64 );
*hashes_done = n - first_nonce + 1;
return true;
}
}
#endif
} while (n < max_nonce && !work_restart[thr_id].restart);
// see blake.c if else to understand the loop on htmax => mask
break;
}
}

View File

@@ -177,6 +177,10 @@ int scanhash_x13sm3( struct work *work, uint32_t max_nonce,
// we need bigendian data...
swab32_array( endiandata, pdata, 20 );
#ifdef DEBUG_ALGO
if (Htarg != 0)
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for (int m=0; m < 6; m++) {
if (Htarg <= htmax[m]) {
uint32_t mask = masks[m];
@@ -184,8 +188,22 @@ int scanhash_x13sm3( struct work *work, uint32_t max_nonce,
pdata[19] = ++n;
be32enc(&endiandata[19], n);
x13sm3_hash(hash64, endiandata);
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget))
submit_solution( work, hash64, mythr );
#ifndef DEBUG_ALGO
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash64[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash64, ptarget)) {
work_set_target_ratio( work, hash64 );
*hashes_done = n - first_nonce + 1;
return true;
}
}
#endif
} while (n < max_nonce && !work_restart[thr_id].restart);
// see blake.c if else to understand the loop on htmax => mask
break;

View File

@@ -63,10 +63,11 @@ int scanhash_axiom( struct work *work,
do {
be32enc(&endiandata[19], n);
axiomhash(hash64, endiandata);
if (hash64[7] < Htarg && fulltest(hash64, ptarget))
{
if (hash64[7] < Htarg && fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
submit_solution( work, hash64, mythr );
work_set_target_ratio( work, hash64 );
return true;
}
n++;

View File

@@ -100,10 +100,11 @@ int scanhash_polytimos( struct work *work, uint32_t max_nonce,
be32enc(&endiandata[19], nonce);
polytimos_hash(hash, endiandata);
if (hash[7] <= Htarg && fulltest(hash, ptarget))
{
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
work_set_target_ratio(work, hash);
pdata[19] = nonce;
submit_solution( work, hash, mythr );
*hashes_done = pdata[19] - first_nonce;
return 1;
}
nonce++;

View File

@@ -89,10 +89,11 @@ int scanhash_veltor( struct work *work, uint32_t max_nonce,
be32enc(&endiandata[19], nonce);
veltor_hash(hash, endiandata);
if (hash[7] <= Htarg && fulltest(hash, ptarget))
{
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
work_set_target_ratio(work, hash);
pdata[19] = nonce;
submit_solution( work, hash, mythr );
*hashes_done = pdata[19] - first_nonce;
return 1;
}
nonce++;

View File

@@ -170,6 +170,10 @@ int scanhash_x14( struct work *work, uint32_t max_nonce,
// we need bigendian data...
swab32_array( endiandata, pdata, 20 );
#ifdef DEBUG_ALGO
if (Htarg != 0)
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for (int m=0; m < 6; m++) {
if (Htarg <= htmax[m]) {
uint32_t mask = masks[m];
@@ -177,9 +181,24 @@ int scanhash_x14( struct work *work, uint32_t max_nonce,
pdata[19] = ++n;
be32enc(&endiandata[19], n);
x14hash(hash64, endiandata);
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget))
submit_solution( work, hash64, mythr );
#ifndef DEBUG_ALGO
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash64[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash64, ptarget)) {
work_set_target_ratio( work, hash64 );
*hashes_done = n - first_nonce + 1;
return true;
}
}
#endif
} while (n < max_nonce && !work_restart[thr_id].restart);
// see blake.c if else to understand the loop on htmax => mask
break;
}
}

View File

@@ -217,22 +217,22 @@ void x16r_8way_hash( void* output, const void* input )
case CUBEHASH:
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
break;
case SHAVITE:
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
sph_shavite512_init( &ctx.shavite );
@@ -275,11 +275,11 @@ void x16r_8way_hash( void* output, const void* input )
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
init_echo( &ctx.echo, 512 );

View File

@@ -211,22 +211,22 @@ void x16rt_8way_hash( void* output, const void* input )
case CUBEHASH:
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
break;
case SHAVITE:
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
sph_shavite512_init( &ctx.shavite );
@@ -269,11 +269,11 @@ void x16rt_8way_hash( void* output, const void* input )
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
init_echo( &ctx.echo, 512 );

View File

@@ -42,12 +42,15 @@ union _x16rv2_8way_context_overlay
{
blake512_8way_context blake;
bmw512_8way_context bmw;
// hashState_groestl groestl;
skein512_8way_context skein;
jh512_8way_context jh;
keccak512_8way_context keccak;
luffa_4way_context luffa;
cube_4way_context cube;
// sph_shavite512_context shavite;
simd_4way_context simd;
// hashState_echo echo;
hamsi512_8way_context hamsi;
sph_fugue512_context fugue;
shabal512_8way_context shabal;
@@ -272,22 +275,22 @@ void x16rv2_8way_hash( void* output, const void* input )
case CUBEHASH:
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
break;
case SHAVITE:
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
sph_shavite512_init( &ctx.shavite );
@@ -330,11 +333,11 @@ void x16rv2_8way_hash( void* output, const void* input )
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
init_echo( &ctx.echo, 512 );

View File

@@ -230,22 +230,22 @@ void x21s_8way_hash( void* output, const void* input )
case CUBEHASH:
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
break;
case SHAVITE:
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, 64 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
sph_shavite512_init( &ctx.shavite );
@@ -285,14 +285,15 @@ void x21s_8way_hash( void* output, const void* input )
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
break;
case ECHO:
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
echo_4way_update_close( &ctx.echo, vhash, vhash, 512 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
init_echo( &ctx.echo, 512 );
@@ -516,7 +517,7 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
}
do

View File

@@ -44,15 +44,15 @@ bool register_xi20r_algo( algo_gate_t* gate );
void x20r_4way_hash( void *state, const void *input );
int scanhash_x20r_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
int scanhash_x20r_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif
void x20rhash( void *state, const void *input );
int scanhash_x20r( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
int scanhash_x20r( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif

View File

@@ -228,8 +228,8 @@ void x20r_hash(void* output, const void* input)
memcpy(output, hash, 32);
}
int scanhash_x20r( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
int scanhash_x20r( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t _ALIGN(128) hash32[8];
uint32_t _ALIGN(128) endiandata[20];
@@ -238,7 +238,6 @@ int scanhash_x20r( struct work *work, uint32_t max_nonce,
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
int thr_id = mythr->id;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
for (int k=0; k < 19; k++)
@@ -260,9 +259,11 @@ int scanhash_x20r( struct work *work, uint32_t max_nonce,
x20r_hash( hash32, endiandata );
if ( hash32[7] <= Htarg && fulltest( hash32, ptarget ) )
{
pdata[19] = nonce;
submit_solution( work, hash32, mythr );
{
work_set_target_ratio( work, hash32 );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
return 1;
}
nonce++;

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.2.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.1.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.11.2'
PACKAGE_STRING='cpuminer-opt 3.11.2'
PACKAGE_VERSION='3.11.1'
PACKAGE_STRING='cpuminer-opt 3.11.1'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.11.2 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.11.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.11.2:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.11.1:";;
esac
cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.11.2
cpuminer-opt configure 3.11.1
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.11.2, which was
It was created by cpuminer-opt $as_me 3.11.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.11.2'
VERSION='3.11.1'
cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.11.2, which was
This file was extended by cpuminer-opt $as_me 3.11.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.11.2
cpuminer-opt config.status 3.11.1
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.11.2])
AC_INIT([cpuminer-opt], [3.11.1])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -3343,7 +3343,7 @@ static void show_credits()
{
printf("\n ********** "PACKAGE_NAME" "PACKAGE_VERSION" *********** \n");
printf(" A CPU miner with multi algo support and optimized for CPUs\n");
printf(" with AVX512, SHA and VAES extensions.\n");
printf(" with AES_NI, AVX2, AVX512, SHA and VAES extensions.\n");
printf(" BTC donation address: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT\n\n");
}

View File

@@ -132,19 +132,10 @@ do { \
// Parallel AES, for when x is expected to be in a 256 bit register.
// Use same 128 bit key.
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define mm256_aesenc_2x128( x, k ) \
_mm256_aesenc_epi128( x, m256_const1_128(k ) )
#else
#define mm256_aesenc_2x128( x, k ) \
mm256_concat_128( _mm_aesenc_si128( mm128_extr_hi128_256( x ), k ), \
_mm_aesenc_si128( mm128_extr_lo128_256( x ), k ) )
#endif
#define mm256_paesenc_2x128( y, x, k ) do \
{ \
__m128i *X = (__m128i*)x; \
@@ -555,14 +546,14 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )
#define mm256_ror512_128( v1, v2 ) \
do { \
__m256i t = _mm256_permute2x128( v1, v2, 0x03 ); \
v1 = _mm256_permute2x128( v2, v1, 0x21 ); \
v1 = _mm256__mm256_permute2x128( v2, v1, 0x21 ); \
v2 = t; \
} while(0)
#define mm256_rol512_128( v1, v2 ) \
do { \
__m256i t = _mm256_permute2x128( v1, v2, 0x03 ); \
v2 = _mm256_permute2x128( v2, v1, 0x21 ); \
v2 = _mm256__mm256_permute2x128( v2, v1, 0x21 ); \
v1 = t; \
} while(0)

View File

@@ -44,14 +44,14 @@
//
// Constants need to be composed at run time by assembling individual
// elements, very expensive. The cost is proportional to the number of
// different elements therefore use the largest element size possible,
// merge smaller integer elements to 64 bits, and group repeated elements.
// elements therefor use the largest element size possible, even by
// merging smaller values.
//
// Constants with repeating patterns can be optimized with the smaller
// patterns repeated more frequently being more efficient.
//
// Some specific constants can be very efficient. Zero is very efficient,
// 1 and -1 slightly less so.
// 1 and -1 slightly less so.
//
// If an expensive constant is to be reused in the same function it should
// be declared as a local variable defined once and reused.