mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.6.5
This commit is contained in:
@@ -90,6 +90,7 @@ cpuminer_SOURCES = \
|
|||||||
algo/hodl/hodl-wolf.c \
|
algo/hodl/hodl-wolf.c \
|
||||||
algo/hodl/sha512_avx.c \
|
algo/hodl/sha512_avx.c \
|
||||||
algo/hodl/sha512_avx2.c \
|
algo/hodl/sha512_avx2.c \
|
||||||
|
algo/jh/jha.c \
|
||||||
algo/lbry.c \
|
algo/lbry.c \
|
||||||
algo/luffa/luffa.c \
|
algo/luffa/luffa.c \
|
||||||
algo/luffa/sse2/luffa_for_sse2.c \
|
algo/luffa/sse2/luffa_for_sse2.c \
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ Supported Algorithms
|
|||||||
heavy Heavy
|
heavy Heavy
|
||||||
hmq1725 Espers
|
hmq1725 Espers
|
||||||
hodl Hodlcoin
|
hodl Hodlcoin
|
||||||
|
jha jackpotcoin
|
||||||
keccak Keccak
|
keccak Keccak
|
||||||
lbry LBC, LBRY Credits
|
lbry LBC, LBRY Credits
|
||||||
luffa Luffa
|
luffa Luffa
|
||||||
@@ -59,6 +60,7 @@ Supported Algorithms
|
|||||||
skein Skein+Sha (Skeincoin)
|
skein Skein+Sha (Skeincoin)
|
||||||
skein2 Double Skein (Woodcoin)
|
skein2 Double Skein (Woodcoin)
|
||||||
timetravel Machinecoin (MAC)
|
timetravel Machinecoin (MAC)
|
||||||
|
timetravel10 Bitcore
|
||||||
vanilla blake256r8vnl (VCash)
|
vanilla blake256r8vnl (VCash)
|
||||||
veltor
|
veltor
|
||||||
whirlpool
|
whirlpool
|
||||||
|
|||||||
@@ -6,6 +6,9 @@ compile flag.
|
|||||||
HW SHA support is only available when compiled from source, Windows binaries
|
HW SHA support is only available when compiled from source, Windows binaries
|
||||||
are not yet available.
|
are not yet available.
|
||||||
|
|
||||||
|
cpuminer-opt is a console program, if you're using a mouse you're doing it
|
||||||
|
wrong.
|
||||||
|
|
||||||
Compile Instructions
|
Compile Instructions
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
@@ -118,6 +121,11 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
|||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
v3.6.5
|
||||||
|
|
||||||
|
Cryptonight a little faster.
|
||||||
|
Added jha algo (Jackpotcoin) with AES optimizations.
|
||||||
|
|
||||||
v3.6.4
|
v3.6.4
|
||||||
|
|
||||||
Added support for Bitcore (BTX) using the timetravel10 algo, optimized for
|
Added support for Bitcore (BTX) using the timetravel10 algo, optimized for
|
||||||
|
|||||||
@@ -169,6 +169,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
|||||||
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
|
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
|
||||||
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
|
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
|
||||||
case ALGO_HODL: register_hodl_algo ( gate ); break;
|
case ALGO_HODL: register_hodl_algo ( gate ); break;
|
||||||
|
case ALGO_JHA: register_jha_algo ( gate ); break;
|
||||||
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
|
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
|
||||||
case ALGO_LBRY: register_lbry_algo ( gate ); break;
|
case ALGO_LBRY: register_lbry_algo ( gate ); break;
|
||||||
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
|
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
|
||||||
@@ -276,6 +277,7 @@ const char* const algo_alias_map[][2] =
|
|||||||
{ "droplp", "drop" },
|
{ "droplp", "drop" },
|
||||||
{ "espers", "hmq1725" },
|
{ "espers", "hmq1725" },
|
||||||
{ "flax", "c11" },
|
{ "flax", "c11" },
|
||||||
|
{ "jackpot", "jha" },
|
||||||
{ "jane", "scryptjane" },
|
{ "jane", "scryptjane" },
|
||||||
{ "lyra2", "lyra2re" },
|
{ "lyra2", "lyra2re" },
|
||||||
{ "lyra2v2", "lyra2rev2" },
|
{ "lyra2v2", "lyra2rev2" },
|
||||||
|
|||||||
@@ -109,43 +109,66 @@ static __thread cryptonight_ctx ctx;
|
|||||||
void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||||
{
|
{
|
||||||
#ifndef NO_AES_NI
|
#ifndef NO_AES_NI
|
||||||
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
|
|
||||||
uint8_t ExpandedKey[256] __attribute__((aligned(64)));
|
uint8_t ExpandedKey[256] __attribute__((aligned(64)));
|
||||||
|
__m128i *longoutput, *expkey, *xmminput;
|
||||||
size_t i, j;
|
size_t i, j;
|
||||||
|
|
||||||
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
|
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
|
||||||
memcpy(ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE);
|
memcpy( ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE );
|
||||||
ExpandAESKey256(ExpandedKey);
|
ExpandAESKey256( ExpandedKey );
|
||||||
|
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
|
||||||
|
|
||||||
__m128i *longoutput, *expkey, *xmminput;
|
longoutput = (__m128i*)ctx.long_state;
|
||||||
longoutput = (__m128i *)ctx.long_state;
|
xmminput = (__m128i*)ctx.text;
|
||||||
expkey = (__m128i *)ExpandedKey;
|
expkey = (__m128i*)ExpandedKey;
|
||||||
xmminput = (__m128i *)ctx.text;
|
|
||||||
|
|
||||||
//for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE)
|
// prefetch expkey, xmminput and enough longoutput for 4 iterations
|
||||||
// aesni_parallel_noxor(&ctx->long_state[i], ctx->text, ExpandedKey);
|
|
||||||
|
|
||||||
// prefetch expkey, all of xmminput and enough longoutput for 4 loops
|
|
||||||
_mm_prefetch( xmminput, _MM_HINT_T0 );
|
_mm_prefetch( xmminput, _MM_HINT_T0 );
|
||||||
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
|
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
|
||||||
for ( i = 0; i < 64; i += 16 )
|
|
||||||
{
|
|
||||||
_mm_prefetch( longoutput + i, _MM_HINT_T0 );
|
|
||||||
_mm_prefetch( longoutput + i + 4, _MM_HINT_T0 );
|
|
||||||
_mm_prefetch( longoutput + i + 8, _MM_HINT_T0 );
|
|
||||||
_mm_prefetch( longoutput + i + 12, _MM_HINT_T0 );
|
|
||||||
}
|
|
||||||
_mm_prefetch( expkey, _MM_HINT_T0 );
|
_mm_prefetch( expkey, _MM_HINT_T0 );
|
||||||
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
|
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
|
||||||
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
|
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
|
||||||
|
for ( i = 0; i < 64; i += 16 )
|
||||||
for ( i = 0; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
|
|
||||||
{
|
{
|
||||||
// prefetch 4 loops ahead,
|
__builtin_prefetch( longoutput + i, 1, 0 );
|
||||||
|
__builtin_prefetch( longoutput + i + 4, 1, 0 );
|
||||||
|
__builtin_prefetch( longoutput + i + 8, 1, 0 );
|
||||||
|
__builtin_prefetch( longoutput + i + 12, 1, 0 );
|
||||||
|
}
|
||||||
|
|
||||||
|
// n-4 iterations
|
||||||
|
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
|
||||||
|
i += INIT_SIZE_M128I )
|
||||||
|
{
|
||||||
|
// prefetch 4 iterations ahead.
|
||||||
__builtin_prefetch( longoutput + i + 64, 1, 0 );
|
__builtin_prefetch( longoutput + i + 64, 1, 0 );
|
||||||
__builtin_prefetch( longoutput + i + 68, 1, 0 );
|
__builtin_prefetch( longoutput + i + 68, 1, 0 );
|
||||||
|
|
||||||
for (j = 0; j < 10; j++ )
|
for ( j = 0; j < 10; j++ )
|
||||||
|
{
|
||||||
|
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||||
|
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||||
|
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||||
|
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||||
|
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||||
|
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||||
|
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||||
|
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||||
|
}
|
||||||
|
_mm_store_si128( &( longoutput[i ] ), xmminput[0] );
|
||||||
|
_mm_store_si128( &( longoutput[i+1] ), xmminput[1] );
|
||||||
|
_mm_store_si128( &( longoutput[i+2] ), xmminput[2] );
|
||||||
|
_mm_store_si128( &( longoutput[i+3] ), xmminput[3] );
|
||||||
|
_mm_store_si128( &( longoutput[i+4] ), xmminput[4] );
|
||||||
|
_mm_store_si128( &( longoutput[i+5] ), xmminput[5] );
|
||||||
|
_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
|
||||||
|
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
|
||||||
|
}
|
||||||
|
// last 4 iterations
|
||||||
|
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
|
||||||
|
{
|
||||||
|
for ( j = 0; j < 10; j++ )
|
||||||
{
|
{
|
||||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||||
@@ -166,83 +189,75 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
|||||||
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
|
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
|
||||||
}
|
}
|
||||||
|
|
||||||
// cast_m128i( ctx.a ) = _mm_xor_si128( casti_m128i( ctx.state.k, 0 ) ,
|
|
||||||
// casti_m128i( ctx.state.k, 2 ) );
|
|
||||||
// cast_m128i( ctx.b ) = _mm_xor_si128( casti_m128i( ctx.state.k, 1 ),
|
|
||||||
// casti_m128i( ctx.state.k, 3 ) );
|
|
||||||
|
|
||||||
ctx.a[0] = ((uint64_t *)ctx.state.k)[0] ^ ((uint64_t *)ctx.state.k)[4];
|
ctx.a[0] = ((uint64_t *)ctx.state.k)[0] ^ ((uint64_t *)ctx.state.k)[4];
|
||||||
ctx.b[0] = ((uint64_t *)ctx.state.k)[2] ^ ((uint64_t *)ctx.state.k)[6];
|
ctx.b[0] = ((uint64_t *)ctx.state.k)[2] ^ ((uint64_t *)ctx.state.k)[6];
|
||||||
ctx.a[1] = ((uint64_t *)ctx.state.k)[1] ^ ((uint64_t *)ctx.state.k)[5];
|
ctx.a[1] = ((uint64_t *)ctx.state.k)[1] ^ ((uint64_t *)ctx.state.k)[5];
|
||||||
ctx.b[1] = ((uint64_t *)ctx.state.k)[3] ^ ((uint64_t *)ctx.state.k)[7];
|
ctx.b[1] = ((uint64_t *)ctx.state.k)[3] ^ ((uint64_t *)ctx.state.k)[7];
|
||||||
|
|
||||||
// for (i = 0; i < 2; i++)
|
uint64_t a[2] __attribute((aligned(16))),
|
||||||
// {
|
b[2] __attribute((aligned(16))),
|
||||||
// ctx.a[i] = ((uint64_t *)ctx.state.k)[i] ^ ((uint64_t *)ctx.state.k)[i+4];
|
c[2] __attribute((aligned(16)));
|
||||||
// ctx.b[i] = ((uint64_t *)ctx.state.k)[i+2] ^ ((uint64_t *)ctx.state.k)[i+6];
|
|
||||||
// }
|
|
||||||
|
|
||||||
__m128i b_x = _mm_load_si128((__m128i *)ctx.b);
|
|
||||||
uint64_t a[2] __attribute((aligned(16))), b[2] __attribute((aligned(16)));
|
|
||||||
a[0] = ctx.a[0];
|
a[0] = ctx.a[0];
|
||||||
a[1] = ctx.a[1];
|
a[1] = ctx.a[1];
|
||||||
|
__m128i b_x = _mm_load_si128( (__m128i*)ctx.b );
|
||||||
|
__m128i a_x = _mm_load_si128( (__m128i*)a );
|
||||||
|
__m128i* lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
|
||||||
|
__m128i c_x = _mm_load_si128( lsa );
|
||||||
|
uint64_t *nextblock;
|
||||||
|
uint64_t hi, lo;
|
||||||
|
|
||||||
for(i = 0; __builtin_expect(i < 0x80000, 1); i++)
|
// n-1 iterations
|
||||||
|
for( i = 0; __builtin_expect( i < 0x7ffff, 1 ); i++ )
|
||||||
{
|
{
|
||||||
uint64_t c[2];
|
c_x = _mm_aesenc_si128( c_x, a_x );
|
||||||
__builtin_prefetch( &ctx.long_state[c[0] & 0x1FFFF0], 0, 1 );
|
_mm_store_si128( (__m128i*)c, c_x );
|
||||||
|
b_x = _mm_xor_si128( b_x, c_x );
|
||||||
__m128i c_x = _mm_load_si128(
|
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||||
(__m128i *)&ctx.long_state[a[0] & 0x1FFFF0]);
|
_mm_store_si128( lsa, b_x );
|
||||||
__m128i a_x = _mm_load_si128((__m128i *)a);
|
|
||||||
c_x = _mm_aesenc_si128(c_x, a_x);
|
|
||||||
_mm_store_si128((__m128i *)c, c_x);
|
|
||||||
|
|
||||||
b_x = _mm_xor_si128(b_x, c_x);
|
|
||||||
_mm_store_si128((__m128i *)&ctx.long_state[a[0] & 0x1FFFF0], b_x);
|
|
||||||
|
|
||||||
uint64_t *nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
|
||||||
// uint64_t b[2];
|
|
||||||
b[0] = nextblock[0];
|
b[0] = nextblock[0];
|
||||||
b[1] = nextblock[1];
|
b[1] = nextblock[1];
|
||||||
|
|
||||||
{
|
|
||||||
uint64_t hi, lo;
|
|
||||||
// hi,lo = 64bit x 64bit multiply of c[0] and b[0]
|
// hi,lo = 64bit x 64bit multiply of c[0] and b[0]
|
||||||
|
__asm__( "mulq %3\n\t"
|
||||||
__asm__("mulq %3\n\t"
|
: "=d" ( hi ),
|
||||||
: "=d" (hi),
|
"=a" ( lo )
|
||||||
"=a" (lo)
|
: "%a" ( c[0] ),
|
||||||
: "%a" (c[0]),
|
"rm" ( b[0] )
|
||||||
"rm" (b[0])
|
|
||||||
: "cc" );
|
: "cc" );
|
||||||
|
|
||||||
a[0] += hi;
|
|
||||||
a[1] += lo;
|
|
||||||
}
|
|
||||||
uint64_t *dst = (uint64_t*)&ctx.long_state[c[0] & 0x1FFFF0];
|
|
||||||
// __m128i *dst = (__m128i*)&ctx.long_state[c[0] & 0x1FFFF0];
|
|
||||||
|
|
||||||
// *dst = cast_m128i( a );
|
|
||||||
dst[0] = a[0];
|
|
||||||
dst[1] = a[1];
|
|
||||||
|
|
||||||
// cast_m128i( a ) = _mm_xor_si128( cast_m128i( a ), cast_m128i( b ) );
|
|
||||||
a[0] ^= b[0];
|
|
||||||
a[1] ^= b[1];
|
|
||||||
b_x = c_x;
|
b_x = c_x;
|
||||||
__builtin_prefetch( &ctx.long_state[a[0] & 0x1FFFF0], 0, 3 );
|
nextblock[0] = a[0] + hi;
|
||||||
|
nextblock[1] = a[1] + lo;
|
||||||
|
a[0] = b[0] ^ nextblock[0];
|
||||||
|
a[1] = b[1] ^ nextblock[1];
|
||||||
|
lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
|
||||||
|
a_x = _mm_load_si128( (__m128i*)a );
|
||||||
|
c_x = _mm_load_si128( lsa );
|
||||||
}
|
}
|
||||||
|
// abreviated nth iteration
|
||||||
|
c_x = _mm_aesenc_si128( c_x, a_x );
|
||||||
|
_mm_store_si128( (__m128i*)c, c_x );
|
||||||
|
b_x = _mm_xor_si128( b_x, c_x );
|
||||||
|
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||||
|
_mm_store_si128( lsa, b_x );
|
||||||
|
b[0] = nextblock[0];
|
||||||
|
b[1] = nextblock[1];
|
||||||
|
|
||||||
|
__asm__( "mulq %3\n\t"
|
||||||
|
: "=d" ( hi ),
|
||||||
|
"=a" ( lo )
|
||||||
|
: "%a" ( c[0] ),
|
||||||
|
"rm" ( b[0] )
|
||||||
|
: "cc" );
|
||||||
|
|
||||||
|
nextblock[0] = a[0] + hi;
|
||||||
|
nextblock[1] = a[1] + lo;
|
||||||
|
|
||||||
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
|
|
||||||
memcpy( ExpandedKey, &ctx.state.hs.b[32], AES_KEY_SIZE );
|
memcpy( ExpandedKey, &ctx.state.hs.b[32], AES_KEY_SIZE );
|
||||||
ExpandAESKey256( ExpandedKey );
|
ExpandAESKey256( ExpandedKey );
|
||||||
|
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
|
||||||
//for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE)
|
|
||||||
// aesni_parallel_xor(&ctx->text, ExpandedKey, &ctx->long_state[i]);
|
|
||||||
|
|
||||||
// prefetch expkey, all of xmminput and enough longoutput for 4 loops
|
// prefetch expkey, all of xmminput and enough longoutput for 4 loops
|
||||||
|
|
||||||
_mm_prefetch( xmminput, _MM_HINT_T0 );
|
_mm_prefetch( xmminput, _MM_HINT_T0 );
|
||||||
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
|
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
|
||||||
for ( i = 0; i < 64; i += 16 )
|
for ( i = 0; i < 64; i += 16 )
|
||||||
@@ -256,9 +271,11 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
|||||||
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
|
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
|
||||||
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
|
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
|
||||||
|
|
||||||
for ( i = 0; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
|
// n-4 iterations
|
||||||
|
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
|
||||||
|
i += INIT_SIZE_M128I )
|
||||||
{
|
{
|
||||||
// stay 4 loops ahead,
|
// stay 4 iterations ahead.
|
||||||
_mm_prefetch( longoutput + i + 64, _MM_HINT_T0 );
|
_mm_prefetch( longoutput + i + 64, _MM_HINT_T0 );
|
||||||
_mm_prefetch( longoutput + i + 68, _MM_HINT_T0 );
|
_mm_prefetch( longoutput + i + 68, _MM_HINT_T0 );
|
||||||
|
|
||||||
@@ -283,10 +300,34 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
|||||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// last 4 iterations
|
||||||
|
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
|
||||||
|
{
|
||||||
|
xmminput[0] = _mm_xor_si128( longoutput[i ], xmminput[0] );
|
||||||
|
xmminput[1] = _mm_xor_si128( longoutput[i+1], xmminput[1] );
|
||||||
|
xmminput[2] = _mm_xor_si128( longoutput[i+2], xmminput[2] );
|
||||||
|
xmminput[3] = _mm_xor_si128( longoutput[i+3], xmminput[3] );
|
||||||
|
xmminput[4] = _mm_xor_si128( longoutput[i+4], xmminput[4] );
|
||||||
|
xmminput[5] = _mm_xor_si128( longoutput[i+5], xmminput[5] );
|
||||||
|
xmminput[6] = _mm_xor_si128( longoutput[i+6], xmminput[6] );
|
||||||
|
xmminput[7] = _mm_xor_si128( longoutput[i+7], xmminput[7] );
|
||||||
|
|
||||||
|
for( j = 0; j < 10; j++ )
|
||||||
|
{
|
||||||
|
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||||
|
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||||
|
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||||
|
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||||
|
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||||
|
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||||
|
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||||
|
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
memcpy( ctx.state.init, ctx.text, INIT_SIZE_BYTE);
|
memcpy( ctx.state.init, ctx.text, INIT_SIZE_BYTE);
|
||||||
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
|
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
|
||||||
|
|
||||||
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
|
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
166
algo/jh/jha.c
Normal file
166
algo/jh/jha.c
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
#include "miner.h"
|
||||||
|
#include "algo-gate-api.h"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "algo/blake/sph_blake.h"
|
||||||
|
#include "algo/jh/sph_jh.h"
|
||||||
|
#include "algo/keccak/sph_keccak.h"
|
||||||
|
#include "algo/skein/sph_skein.h"
|
||||||
|
|
||||||
|
#ifdef NO_AES_NI
|
||||||
|
#include "algo/groestl/sph_groestl.h"
|
||||||
|
#else
|
||||||
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static __thread sph_keccak512_context jha_kec_mid __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
void jha_kec_midstate( const void* input )
|
||||||
|
{
|
||||||
|
sph_keccak512_init( &jha_kec_mid );
|
||||||
|
sph_keccak512( &jha_kec_mid, input, 64 );
|
||||||
|
}
|
||||||
|
|
||||||
|
void jha_hash(void *output, const void *input)
|
||||||
|
{
|
||||||
|
uint8_t _ALIGN(128) hash[64];
|
||||||
|
|
||||||
|
#ifdef NO_AES_NI
|
||||||
|
sph_groestl512_context ctx_groestl;
|
||||||
|
#else
|
||||||
|
hashState_groestl ctx_groestl;
|
||||||
|
#endif
|
||||||
|
sph_blake512_context ctx_blake;
|
||||||
|
sph_jh512_context ctx_jh;
|
||||||
|
sph_keccak512_context ctx_keccak;
|
||||||
|
sph_skein512_context ctx_skein;
|
||||||
|
|
||||||
|
sph_keccak512_init(&ctx_keccak);
|
||||||
|
memcpy( &ctx_keccak, &jha_kec_mid, sizeof jha_kec_mid );
|
||||||
|
sph_keccak512(&ctx_keccak, input+64, 16 );
|
||||||
|
sph_keccak512_close(&ctx_keccak, hash );
|
||||||
|
|
||||||
|
// Heavy & Light Pair Loop
|
||||||
|
for (int round = 0; round < 3; round++)
|
||||||
|
{
|
||||||
|
if (hash[0] & 0x01)
|
||||||
|
{
|
||||||
|
#ifdef NO_AES_NI
|
||||||
|
sph_groestl512_init(&ctx_groestl);
|
||||||
|
sph_groestl512(&ctx_groestl, hash, 64 );
|
||||||
|
sph_groestl512_close(&ctx_groestl, hash );
|
||||||
|
#else
|
||||||
|
init_groestl( &ctx_groestl, 64 );
|
||||||
|
update_and_final_groestl( &ctx_groestl, (char*)hash,
|
||||||
|
(char*)hash, 512 );
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sph_skein512_init(&ctx_skein);
|
||||||
|
sph_skein512(&ctx_skein, hash, 64);
|
||||||
|
sph_skein512_close(&ctx_skein, hash );
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hash[0] & 0x01)
|
||||||
|
{
|
||||||
|
sph_blake512_init(&ctx_blake);
|
||||||
|
sph_blake512(&ctx_blake, hash, 64);
|
||||||
|
sph_blake512_close(&ctx_blake, hash );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sph_jh512_init(&ctx_jh);
|
||||||
|
sph_jh512(&ctx_jh, hash, 64 );
|
||||||
|
sph_jh512_close(&ctx_jh, hash );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(output, hash, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_jha(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||||
|
{
|
||||||
|
uint32_t _ALIGN(128) hash32[8];
|
||||||
|
uint32_t _ALIGN(128) endiandata[20];
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
uint32_t n = pdata[19] - 1;
|
||||||
|
|
||||||
|
uint64_t htmax[] = {
|
||||||
|
0,
|
||||||
|
0xF,
|
||||||
|
0xFF,
|
||||||
|
0xFFF,
|
||||||
|
0xFFFF,
|
||||||
|
0x10000000
|
||||||
|
};
|
||||||
|
uint32_t masks[] = {
|
||||||
|
0xFFFFFFFF,
|
||||||
|
0xFFFFFFF0,
|
||||||
|
0xFFFFFF00,
|
||||||
|
0xFFFFF000,
|
||||||
|
0xFFFF0000,
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
|
// we need bigendian data...
|
||||||
|
for (int i=0; i < 19; i++) {
|
||||||
|
be32enc(&endiandata[i], pdata[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
jha_kec_midstate( endiandata );
|
||||||
|
|
||||||
|
#ifdef DEBUG_ALGO
|
||||||
|
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||||
|
#endif
|
||||||
|
for (int m=0; m < 6; m++) {
|
||||||
|
if (Htarg <= htmax[m]) {
|
||||||
|
uint32_t mask = masks[m];
|
||||||
|
do {
|
||||||
|
pdata[19] = ++n;
|
||||||
|
be32enc(&endiandata[19], n);
|
||||||
|
jha_hash(hash32, endiandata);
|
||||||
|
#ifndef DEBUG_ALGO
|
||||||
|
if ((!(hash32[7] & mask)) && fulltest(hash32, ptarget)) {
|
||||||
|
work_set_target_ratio(work, hash32);
|
||||||
|
*hashes_done = n - first_nonce + 1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||||
|
if (!(hash32[7] & mask)) {
|
||||||
|
printf("[%d]",thr_id);
|
||||||
|
if (fulltest(hash32, ptarget)) {
|
||||||
|
work_set_target_ratio(work, hash32);
|
||||||
|
*hashes_done = n - first_nonce + 1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||||
|
// see blake.c if else to understand the loop on htmax => mask
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*hashes_done = n - first_nonce + 1;
|
||||||
|
pdata[19] = n;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool register_jha_algo( algo_gate_t* gate )
|
||||||
|
{
|
||||||
|
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||||
|
gate->scanhash = (void*)&scanhash_jha;
|
||||||
|
gate->hash = (void*)&jha_hash;
|
||||||
|
gate->set_target = (void*)&scrypt_set_target;
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
AC_INIT([cpuminer-opt], [3.6.4])
|
AC_INIT([cpuminer-opt], [3.6.5])
|
||||||
|
|
||||||
AC_PREREQ([2.59c])
|
AC_PREREQ([2.59c])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_SYSTEM
|
||||||
|
|||||||
3
miner.h
3
miner.h
@@ -495,6 +495,7 @@ enum algos {
|
|||||||
ALGO_HEAVY,
|
ALGO_HEAVY,
|
||||||
ALGO_HMQ1725,
|
ALGO_HMQ1725,
|
||||||
ALGO_HODL,
|
ALGO_HODL,
|
||||||
|
ALGO_JHA,
|
||||||
ALGO_KECCAK,
|
ALGO_KECCAK,
|
||||||
ALGO_LBRY,
|
ALGO_LBRY,
|
||||||
ALGO_LUFFA,
|
ALGO_LUFFA,
|
||||||
@@ -558,6 +559,7 @@ static const char* const algo_names[] = {
|
|||||||
"heavy",
|
"heavy",
|
||||||
"hmq1725",
|
"hmq1725",
|
||||||
"hodl",
|
"hodl",
|
||||||
|
"jha",
|
||||||
"keccak",
|
"keccak",
|
||||||
"lbry",
|
"lbry",
|
||||||
"luffa",
|
"luffa",
|
||||||
@@ -675,6 +677,7 @@ Options:\n\
|
|||||||
heavy Heavy\n\
|
heavy Heavy\n\
|
||||||
hmq1725 Espers\n\
|
hmq1725 Espers\n\
|
||||||
hodl Hodlcoin\n\
|
hodl Hodlcoin\n\
|
||||||
|
jha jackppot (Jackpotcoin)\n\
|
||||||
keccak Keccak\n\
|
keccak Keccak\n\
|
||||||
lbry LBC, LBRY Credits\n\
|
lbry LBC, LBRY Credits\n\
|
||||||
luffa Luffa\n\
|
luffa Luffa\n\
|
||||||
|
|||||||
Reference in New Issue
Block a user