mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.7.10
This commit is contained in:
19
Makefile.am
19
Makefile.am
@@ -46,8 +46,10 @@ cpuminer_SOURCES = \
|
||||
algo/blake/sph_blake2b.c \
|
||||
algo/blake/blake2b.c \
|
||||
algo/blake/blake2s.c \
|
||||
algo/blake/blakecoin-gate.c \
|
||||
algo/blake/mod_blakecoin.c \
|
||||
algo/blake/blakecoin.c \
|
||||
algo/blake/blakecoin-4way.c \
|
||||
algo/blake/decred-gate.c \
|
||||
algo/blake/decred.c \
|
||||
algo/blake/decred-4way.c \
|
||||
@@ -99,13 +101,17 @@ cpuminer_SOURCES = \
|
||||
algo/luffa/sse2/luffa_for_sse2.c \
|
||||
algo/lyra2/lyra2.c \
|
||||
algo/lyra2/sponge.c \
|
||||
algo/lyra2/lyra2rev2-gate.c \
|
||||
algo/lyra2/lyra2rev2.c \
|
||||
algo/lyra2/lyra2rev2-4way.c \
|
||||
algo/lyra2/lyra2re.c \
|
||||
algo/lyra2/lyra2z-gate.c \
|
||||
algo/lyra2/lyra2z.c \
|
||||
algo/lyra2/lyra2z-4way.c \
|
||||
algo/lyra2/lyra2z330.c \
|
||||
algo/lyra2/lyra2h-gate.c \
|
||||
algo/lyra2/lyra2h.c \
|
||||
algo/lyra2/lyra2h-4way.c \
|
||||
algo/m7m.c \
|
||||
algo/neoscrypt/neoscrypt.c \
|
||||
algo/nist5/nist5-gate.c \
|
||||
@@ -113,7 +119,9 @@ cpuminer_SOURCES = \
|
||||
algo/nist5/nist5.c \
|
||||
algo/nist5/zr5.c \
|
||||
algo/pluck.c \
|
||||
algo/quark/quark-gate.c \
|
||||
algo/quark/quark.c \
|
||||
algo/quark/quark-4way.c \
|
||||
algo/qubit/qubit.c \
|
||||
algo/qubit/deep.c \
|
||||
algo/ripemd/sph_ripemd.c \
|
||||
@@ -140,9 +148,8 @@ cpuminer_SOURCES = \
|
||||
algo/skein/skein2-4way.c \
|
||||
algo/skein/skein2-gate.c \
|
||||
algo/sm3/sm3.c \
|
||||
algo/sm3/sm3-hash-4way.c \
|
||||
algo/tiger/sph_tiger.c \
|
||||
algo/timetravel.c \
|
||||
algo/timetravel10.c \
|
||||
algo/whirlpool/sph_whirlpool.c \
|
||||
algo/whirlpool/whirlpool-hash-4way.c \
|
||||
algo/whirlpool/whirlpool-gate.c \
|
||||
@@ -161,8 +168,16 @@ cpuminer_SOURCES = \
|
||||
algo/x11/tribus-gate.c \
|
||||
algo/x11/tribus.c \
|
||||
algo/x11/tribus-4way.c \
|
||||
algo/x11/timetravel-gate.c \
|
||||
algo/x11/timetravel.c \
|
||||
algo/x11/timetravel-4way.c \
|
||||
algo/x11/timetravel10-gate.c \
|
||||
algo/x11/timetravel10.c \
|
||||
algo/x11/timetravel10-4way.c \
|
||||
algo/x11/fresh.c \
|
||||
algo/x11/x11evo.c \
|
||||
algo/x11/x11evo-4way.c \
|
||||
algo/x11/x11evo-gate.c \
|
||||
algo/x13/x13-gate.c \
|
||||
algo/x13/x13.c \
|
||||
algo/x13/x13-4way.c \
|
||||
|
@@ -165,6 +165,13 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.7.10
|
||||
|
||||
4way optimizations for lyra2rev2, lyra2h, quark, timetravel8, timetravel10
|
||||
x11evo, blakecoin.
|
||||
Faster x13sm3 (hsr).
|
||||
Added share difficulty to accepted message.
|
||||
|
||||
v3.7.9
|
||||
|
||||
Partial 4way optimizations for veltor, skunk, polytimos, lyra2z.
|
||||
|
@@ -1,31 +1,22 @@
|
||||
#include "blake-gate.h"
|
||||
#include "sph_blake.h"
|
||||
|
||||
#if defined (__AVX__)
|
||||
|
||||
#include "blake-hash-4way.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
|
||||
#if defined (BLAKE_4WAY)
|
||||
blake256r14_4way_context blake_ctx;
|
||||
|
||||
void blakehash_4way(void *state, const void *input)
|
||||
{
|
||||
uint32_t vhash[4*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash0[4] __attribute__ ((aligned (32)));
|
||||
uint32_t hash1[4] __attribute__ ((aligned (32)));
|
||||
uint32_t hash2[4] __attribute__ ((aligned (32)));
|
||||
uint32_t hash3[4] __attribute__ ((aligned (32)));
|
||||
blake256_4way_context ctx;
|
||||
|
||||
blake256_4way_init( &ctx );
|
||||
blake256_4way( &ctx, input, 16 );
|
||||
blake256_4way_close( &ctx, vhash );
|
||||
|
||||
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
memcpy( state+64, hash1, 32 );
|
||||
memcpy( state+96, hash1, 32 );
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake256r14_4way_context ctx;
|
||||
memcpy( &ctx, &blake_ctx, sizeof ctx );
|
||||
blake256r14_4way( &ctx, input + (64<<2), 16 );
|
||||
blake256r14_4way_close( &ctx, vhash );
|
||||
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
@@ -36,21 +27,24 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
// uint32_t HTarget = ptarget[7];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
|
||||
// if (opt_benchmark)
|
||||
// HTarget = 0x7f;
|
||||
if (opt_benchmark)
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( edata, pdata, 20 );
|
||||
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
blake256r14_4way_init( &blake_ctx );
|
||||
blake256r14_4way( &blake_ctx, vdata, 64 );
|
||||
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
@@ -61,45 +55,36 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
blakehash_4way( hash, vdata );
|
||||
|
||||
if ( hash[7] == 0 )
|
||||
if ( hash[7] <= HTarget && fulltest( hash, ptarget ) )
|
||||
{
|
||||
if ( fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
pdata[19] = n;
|
||||
}
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
pdata[19] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] == 0 )
|
||||
if ( (hash+8)[7] <= HTarget && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
if ( fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
}
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] == 0 )
|
||||
if ( (hash+16)[7] <= HTarget && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
if ( fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
}
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] == 0 )
|
||||
if ( (hash+24)[7] <= HTarget && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
if ( fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
}
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n += 4;
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
@@ -491,14 +491,9 @@ do { \
|
||||
(state)->T1 = T1; \
|
||||
} while (0)
|
||||
|
||||
//#define BLAKE32_ROUNDS 8
|
||||
#ifndef BLAKE32_ROUNDS
|
||||
#define BLAKE32_ROUNDS 14
|
||||
#endif
|
||||
|
||||
#if SPH_COMPACT_BLAKE_32
|
||||
|
||||
#define COMPRESS32_4WAY do { \
|
||||
#define COMPRESS32_4WAY( rounds ) do { \
|
||||
__m128i M[16]; \
|
||||
__m128i V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
__m128i V8, V9, VA, VB, VC, VD, VE, VF; \
|
||||
@@ -539,7 +534,7 @@ do { \
|
||||
M[0xD] = mm_byteswap_32( *(buf + 13) ); \
|
||||
M[0xE] = mm_byteswap_32( *(buf + 14) ); \
|
||||
M[0xF] = mm_byteswap_32( *(buf + 15) ); \
|
||||
for (r = 0; r < BLAKE32_ROUNDS; r ++) \
|
||||
for (r = 0; r < rounds; r ++) \
|
||||
ROUND_S_4WAY(r); \
|
||||
H0 = _mm_xor_si128( _mm_xor_si128( \
|
||||
_mm_xor_si128( S0, V0 ), V8 ), H0 ); \
|
||||
@@ -563,80 +558,70 @@ do { \
|
||||
|
||||
// current impl
|
||||
|
||||
#define COMPRESS32_4WAY do { \
|
||||
__m128i M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||
__m128i M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
__m128i V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
__m128i V8, V9, VA, VB, VC, VD, VE, VF; \
|
||||
V0 = H0; \
|
||||
V1 = H1; \
|
||||
V2 = H2; \
|
||||
V3 = H3; \
|
||||
V4 = H4; \
|
||||
V5 = H5; \
|
||||
V6 = H6; \
|
||||
V7 = H7; \
|
||||
V8 = _mm_xor_si128( S0, _mm_set_epi32( CS0, CS0, CS0, CS0 ) ); \
|
||||
V9 = _mm_xor_si128( S1, _mm_set_epi32( CS1, CS1, CS1, CS1 ) ); \
|
||||
VA = _mm_xor_si128( S2, _mm_set_epi32( CS2, CS2, CS2, CS2 ) ); \
|
||||
VB = _mm_xor_si128( S3, _mm_set_epi32( CS3, CS3, CS3, CS3 ) ); \
|
||||
VC = _mm_xor_si128( _mm_set_epi32( T0, T0, T0, T0 ), \
|
||||
_mm_set_epi32( CS4, CS4, CS4, CS4 ) ); \
|
||||
VD = _mm_xor_si128( _mm_set_epi32( T0, T0, T0, T0 ), \
|
||||
_mm_set_epi32( CS5, CS5, CS5, CS5 ) ); \
|
||||
VE = _mm_xor_si128( _mm_set_epi32( T1, T1, T1, T1 ), \
|
||||
_mm_set_epi32( CS6, CS6, CS6, CS6 ) ); \
|
||||
VF = _mm_xor_si128( _mm_set_epi32( T1, T1, T1, T1 ), \
|
||||
_mm_set_epi32( CS7, CS7, CS7, CS7 ) ); \
|
||||
M0 = mm_byteswap_32( * buf ); \
|
||||
M1 = mm_byteswap_32( *(buf+1) ); \
|
||||
M2 = mm_byteswap_32( *(buf+2) ); \
|
||||
M3 = mm_byteswap_32( *(buf+3) ); \
|
||||
M4 = mm_byteswap_32( *(buf+4) ); \
|
||||
M5 = mm_byteswap_32( *(buf+5) ); \
|
||||
M6 = mm_byteswap_32( *(buf+6) ); \
|
||||
M7 = mm_byteswap_32( *(buf+7) ); \
|
||||
M8 = mm_byteswap_32( *(buf+8) ); \
|
||||
M9 = mm_byteswap_32( *(buf+9) ); \
|
||||
MA = mm_byteswap_32( *(buf+10) ); \
|
||||
MB = mm_byteswap_32( *(buf+11) ); \
|
||||
MC = mm_byteswap_32( *(buf+12) ); \
|
||||
MD = mm_byteswap_32( *(buf+13) ); \
|
||||
ME = mm_byteswap_32( *(buf+14) ); \
|
||||
MF = mm_byteswap_32( *(buf+15) ); \
|
||||
ROUND_S_4WAY(0); \
|
||||
ROUND_S_4WAY(1); \
|
||||
ROUND_S_4WAY(2); \
|
||||
ROUND_S_4WAY(3); \
|
||||
ROUND_S_4WAY(4); \
|
||||
ROUND_S_4WAY(5); \
|
||||
ROUND_S_4WAY(6); \
|
||||
ROUND_S_4WAY(7); \
|
||||
if (BLAKE32_ROUNDS == 14) { \
|
||||
ROUND_S_4WAY(8); \
|
||||
ROUND_S_4WAY(9); \
|
||||
ROUND_S_4WAY(0); \
|
||||
ROUND_S_4WAY(1); \
|
||||
ROUND_S_4WAY(2); \
|
||||
ROUND_S_4WAY(3); \
|
||||
} \
|
||||
H0 = _mm_xor_si128( _mm_xor_si128( \
|
||||
_mm_xor_si128( V8, V0 ), S0 ), H0 ); \
|
||||
H1 = _mm_xor_si128( _mm_xor_si128( \
|
||||
_mm_xor_si128( V9, V1 ), S1 ), H1 ); \
|
||||
H2 = _mm_xor_si128( _mm_xor_si128( \
|
||||
_mm_xor_si128( VA, V2 ), S2 ), H2 ); \
|
||||
H3 = _mm_xor_si128( _mm_xor_si128( \
|
||||
_mm_xor_si128( VB, V3 ), S3 ), H3 ); \
|
||||
H4 = _mm_xor_si128( _mm_xor_si128( \
|
||||
_mm_xor_si128( VC, V4 ), S0 ), H4 ); \
|
||||
H5 = _mm_xor_si128( _mm_xor_si128( \
|
||||
_mm_xor_si128( VD, V5 ), S1 ), H5 ); \
|
||||
H6 = _mm_xor_si128( _mm_xor_si128( \
|
||||
_mm_xor_si128( VE, V6 ), S2 ), H6 ); \
|
||||
H7 = _mm_xor_si128( _mm_xor_si128( \
|
||||
_mm_xor_si128( VF, V7 ), S3 ), H7 ); \
|
||||
} while (0)
|
||||
#define COMPRESS32_4WAY( rounds ) \
|
||||
do { \
|
||||
__m128i M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||
__m128i M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
__m128i V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
__m128i V8, V9, VA, VB, VC, VD, VE, VF; \
|
||||
V0 = H0; \
|
||||
V1 = H1; \
|
||||
V2 = H2; \
|
||||
V3 = H3; \
|
||||
V4 = H4; \
|
||||
V5 = H5; \
|
||||
V6 = H6; \
|
||||
V7 = H7; \
|
||||
V8 = _mm_xor_si128( S0, _mm_set_epi32( CS0, CS0, CS0, CS0 ) ); \
|
||||
V9 = _mm_xor_si128( S1, _mm_set_epi32( CS1, CS1, CS1, CS1 ) ); \
|
||||
VA = _mm_xor_si128( S2, _mm_set_epi32( CS2, CS2, CS2, CS2 ) ); \
|
||||
VB = _mm_xor_si128( S3, _mm_set_epi32( CS3, CS3, CS3, CS3 ) ); \
|
||||
VC = _mm_xor_si128( _mm_set1_epi32( T0 ), _mm_set1_epi32( CS4 ) ); \
|
||||
VD = _mm_xor_si128( _mm_set1_epi32( T0 ), _mm_set1_epi32( CS5 ) ); \
|
||||
VE = _mm_xor_si128( _mm_set1_epi32( T1 ), _mm_set1_epi32( CS6 ) ); \
|
||||
VF = _mm_xor_si128( _mm_set1_epi32( T1 ), _mm_set1_epi32( CS7 ) ); \
|
||||
M0 = mm_byteswap_32( * buf ); \
|
||||
M1 = mm_byteswap_32( *(buf+1) ); \
|
||||
M2 = mm_byteswap_32( *(buf+2) ); \
|
||||
M3 = mm_byteswap_32( *(buf+3) ); \
|
||||
M4 = mm_byteswap_32( *(buf+4) ); \
|
||||
M5 = mm_byteswap_32( *(buf+5) ); \
|
||||
M6 = mm_byteswap_32( *(buf+6) ); \
|
||||
M7 = mm_byteswap_32( *(buf+7) ); \
|
||||
M8 = mm_byteswap_32( *(buf+8) ); \
|
||||
M9 = mm_byteswap_32( *(buf+9) ); \
|
||||
MA = mm_byteswap_32( *(buf+10) ); \
|
||||
MB = mm_byteswap_32( *(buf+11) ); \
|
||||
MC = mm_byteswap_32( *(buf+12) ); \
|
||||
MD = mm_byteswap_32( *(buf+13) ); \
|
||||
ME = mm_byteswap_32( *(buf+14) ); \
|
||||
MF = mm_byteswap_32( *(buf+15) ); \
|
||||
ROUND_S_4WAY(0); \
|
||||
ROUND_S_4WAY(1); \
|
||||
ROUND_S_4WAY(2); \
|
||||
ROUND_S_4WAY(3); \
|
||||
ROUND_S_4WAY(4); \
|
||||
ROUND_S_4WAY(5); \
|
||||
ROUND_S_4WAY(6); \
|
||||
ROUND_S_4WAY(7); \
|
||||
if (rounds == 14) \
|
||||
{ \
|
||||
ROUND_S_4WAY(8); \
|
||||
ROUND_S_4WAY(9); \
|
||||
ROUND_S_4WAY(0); \
|
||||
ROUND_S_4WAY(1); \
|
||||
ROUND_S_4WAY(2); \
|
||||
ROUND_S_4WAY(3); \
|
||||
} \
|
||||
H0 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( V8, V0 ), S0 ), H0 ); \
|
||||
H1 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( V9, V1 ), S1 ), H1 ); \
|
||||
H2 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( VA, V2 ), S2 ), H2 ); \
|
||||
H3 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( VB, V3 ), S3 ), H3 ); \
|
||||
H4 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( VC, V4 ), S0 ), H4 ); \
|
||||
H5 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( VD, V5 ), S1 ), H5 ); \
|
||||
H6 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( VE, V6 ), S2 ), H6 ); \
|
||||
H7 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( VF, V7 ), S3 ), H7 ); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -832,15 +817,16 @@ static const sph_u32 salt_zero_small[4] = { 0, 0, 0, 0 };
|
||||
|
||||
static void
|
||||
blake32_4way_init( blake_4way_small_context *sc, const sph_u32 *iv,
|
||||
const sph_u32 *salt)
|
||||
const sph_u32 *salt, int rounds )
|
||||
{
|
||||
int i;
|
||||
for ( i = 0; i < 8; i++ )
|
||||
sc->H[i] = _mm_set1_epi32( iv[i] );
|
||||
for ( i = 0; i < 4; i++ )
|
||||
sc->S[i] = _mm_set1_epi32( salt[i] );
|
||||
sc->T0 = sc->T1 = 0;
|
||||
sc->ptr = 0;
|
||||
int i;
|
||||
for ( i = 0; i < 8; i++ )
|
||||
sc->H[i] = _mm_set1_epi32( iv[i] );
|
||||
for ( i = 0; i < 4; i++ )
|
||||
sc->S[i] = _mm_set1_epi32( salt[i] );
|
||||
sc->T0 = sc->T1 = 0;
|
||||
sc->ptr = 0;
|
||||
sc->rounds = rounds;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -878,7 +864,7 @@ blake32_4way( blake_4way_small_context *sc, const void *data, size_t len )
|
||||
{
|
||||
if ( ( T0 = SPH_T32(T0 + 512) ) < 512 )
|
||||
T1 = SPH_T32(T1 + 1);
|
||||
COMPRESS32_4WAY;
|
||||
COMPRESS32_4WAY( sc->rounds );
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
@@ -1079,10 +1065,11 @@ blake64_4way_close( blake_4way_big_context *sc,
|
||||
|
||||
#endif
|
||||
|
||||
// default 14 rounds, backward copatibility
|
||||
void
|
||||
blake256_4way_init(void *cc)
|
||||
{
|
||||
blake32_4way_init(cc, IV256, salt_zero_small);
|
||||
blake32_4way_init( cc, IV256, salt_zero_small, 14 );
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1094,13 +1081,43 @@ blake256_4way(void *cc, const void *data, size_t len)
|
||||
void
|
||||
blake256_4way_close(void *cc, void *dst)
|
||||
{
|
||||
blake256_4way_addbits_and_close(cc, 0, 0, dst);
|
||||
blake32_4way_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
// 14 rounds blake, decred
|
||||
void blake256r14_4way_init(void *cc)
|
||||
{
|
||||
blake32_4way_init( cc, IV256, salt_zero_small, 14 );
|
||||
}
|
||||
|
||||
void
|
||||
blake256_4way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
blake256r14_4way(void *cc, const void *data, size_t len)
|
||||
{
|
||||
blake32_4way_close(cc, ub, n, dst, 8);
|
||||
blake32_4way(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
blake256r14_4way_close(void *cc, void *dst)
|
||||
{
|
||||
blake32_4way_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
// 8 rounds blakecoin, vanilla
|
||||
void blake256r8_4way_init(void *cc)
|
||||
{
|
||||
blake32_4way_init( cc, IV256, salt_zero_small, 8 );
|
||||
}
|
||||
|
||||
void
|
||||
blake256r8_4way(void *cc, const void *data, size_t len)
|
||||
{
|
||||
blake32_4way(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
blake256r8_4way_close(void *cc, void *dst)
|
||||
{
|
||||
blake32_4way_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
@@ -35,7 +35,9 @@
|
||||
*/
|
||||
|
||||
#ifndef __BLAKE_HASH_4WAY__
|
||||
#define __BLAKE_HASH_4WAY___
|
||||
#define __BLAKE_HASH_4WAY__
|
||||
|
||||
#ifdef __AVX__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
@@ -45,38 +47,36 @@ extern "C"{
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BLAKE-256.
|
||||
*/
|
||||
#define SPH_SIZE_blake256 256
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BLAKE-512.
|
||||
*/
|
||||
#define SPH_SIZE_blake512 512
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __AVX__
|
||||
typedef struct {
|
||||
__m128i buf[16] __attribute__ ((aligned (64)));
|
||||
__m128i H[8];
|
||||
__m128i S[4];
|
||||
size_t ptr;
|
||||
sph_u32 T0, T1;
|
||||
__m128i buf[16] __attribute__ ((aligned (64)));
|
||||
__m128i H[8];
|
||||
__m128i S[4];
|
||||
size_t ptr;
|
||||
sph_u32 T0, T1;
|
||||
int rounds; // 14 for blake, 8 for blakecoin & vanilla
|
||||
} blake_4way_small_context;
|
||||
|
||||
// Default 14 rounds
|
||||
typedef blake_4way_small_context blake256_4way_context;
|
||||
|
||||
void blake256_4way_init(void *cc);
|
||||
void blake256_4way(void *cc, const void *data, size_t len);
|
||||
void blake256_4way_close(void *cc, void *dst);
|
||||
void blake256_4way_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
// 14 rounds, blake, decred
|
||||
typedef blake_4way_small_context blake256r14_4way_context;
|
||||
void blake256r14_4way_init(void *cc);
|
||||
void blake256r14_4way(void *cc, const void *data, size_t len);
|
||||
void blake256r14_4way_close(void *cc, void *dst);
|
||||
|
||||
// 8 rounds, blakecoin, vanilla
|
||||
typedef blake_4way_small_context blake256r8_4way_context;
|
||||
void blake256r8_4way_init(void *cc);
|
||||
void blake256r8_4way(void *cc, const void *data, size_t len);
|
||||
void blake256r8_4way_close(void *cc, void *dst);
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
@@ -103,3 +103,5 @@ void blake512_4way_addbits_and_close(
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
106
algo/blake/blakecoin-4way.c
Normal file
106
algo/blake/blakecoin-4way.c
Normal file
@@ -0,0 +1,106 @@
|
||||
#include "blakecoin-gate.h"
|
||||
|
||||
#if defined (__AVX__)
|
||||
|
||||
#include "blake-hash-4way.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
|
||||
blake256r8_4way_context blakecoin_ctx;
|
||||
|
||||
void blakecoin_4way_hash(void *state, const void *input)
|
||||
{
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake256r8_4way_context ctx;
|
||||
memcpy( &ctx, &blakecoin_ctx, sizeof ctx );
|
||||
blake256r8_4way( &ctx, input + (64<<2), 16 );
|
||||
blake256r8_4way_close( &ctx, vhash );
|
||||
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
|
||||
if (opt_benchmark)
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( edata, pdata, 20 );
|
||||
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
blake256r8_4way_init( &blakecoin_ctx );
|
||||
blake256r8_4way( &blakecoin_ctx, vdata, 64 );
|
||||
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
|
||||
blakecoin_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= HTarget && fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= HTarget && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= HTarget && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= HTarget && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
|
||||
// workaround to prevent flood of hash reports when nonce range exhasuted
|
||||
// and thread is spinning waiting for new work
|
||||
if ( ( n >= max_nonce ) && ( *hashes_done < 10 ) )
|
||||
{
|
||||
*hashes_done = 0;
|
||||
sleep(1);
|
||||
}
|
||||
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
71
algo/blake/blakecoin-gate.c
Normal file
71
algo/blake/blakecoin-gate.c
Normal file
@@ -0,0 +1,71 @@
|
||||
#include "blakecoin-gate.h"
|
||||
#include <memory.h>
|
||||
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blakecoin_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
// return 0x3fffffLL;
|
||||
}
|
||||
|
||||
// Blakecoin 4 way hashes so fast it runs out of nonces.
|
||||
// This is an attempt to solve this but the result may be
|
||||
// to rehash old nonces until new work is received.
|
||||
void bc4w_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
uint32_t *end_nonce_ptr, bool clean_job )
|
||||
{
|
||||
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
|
||||
//
|
||||
// if ( have_stratum && ( *nonceptr >= *end_nonce_ptr ) )
|
||||
// algo_gate.stratum_gen_work( &stratum, g_work );
|
||||
|
||||
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
|
||||
|| ( *nonceptr >= *end_nonce_ptr )
|
||||
|| ( work->job_id != g_work->job_id ) && clean_job )
|
||||
/*
|
||||
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
|
||||
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr )
|
||||
|| ( work->job_id != g_work->job_id ) ) )
|
||||
*/
|
||||
{
|
||||
work_free( work );
|
||||
work_copy( work, g_work );
|
||||
*nonceptr = 0xffffffffU / opt_n_threads * thr_id;
|
||||
if ( opt_randomize )
|
||||
*nonceptr += ( (rand() *4 ) & UINT32_MAX ) / opt_n_threads;
|
||||
*end_nonce_ptr = ( 0xffffffffU / opt_n_threads ) * (thr_id+1) - 0x20;
|
||||
// try incrementing the xnonce to chsnge the data
|
||||
// for ( int i = 0; i < work->xnonce2_size && !( ++work->xnonce2[i] ); i++ );
|
||||
}
|
||||
else
|
||||
++(*nonceptr);
|
||||
}
|
||||
|
||||
|
||||
// vanilla uses default gen merkle root, otherwise identical to blakecoin
|
||||
bool register_vanilla_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(BLAKECOIN_4WAY)
|
||||
// four_way_not_tested();
|
||||
gate->optimizations = FOUR_WAY_OPT;
|
||||
gate->scanhash = (void*)&scanhash_blakecoin_4way;
|
||||
gate->hash = (void*)&blakecoin_4way_hash;
|
||||
// gate->get_new_work = (void*)&bc4w_get_new_work;
|
||||
// blakecoin_4way_init( &blake_4way_init_ctx );
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_blakecoin;
|
||||
gate->hash = (void*)&blakecoinhash;
|
||||
// blakecoin_init( &blake_init_ctx );
|
||||
#endif
|
||||
gate->optimizations = AVX2_OPT | FOUR_WAY_OPT;
|
||||
gate->get_max64 = (void*)&blakecoin_get_max64;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool register_blakecoin_algo( algo_gate_t* gate )
|
||||
{
|
||||
register_vanilla_algo( gate );
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
return true;
|
||||
}
|
||||
|
21
algo/blake/blakecoin-gate.h
Normal file
21
algo/blake/blakecoin-gate.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef __BLAKECOIN_GATE_H__
|
||||
#define __BLAKECOIN_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(FOUR_WAY) && defined(__AVX__)
|
||||
#define BLAKECOIN_4WAY
|
||||
#endif
|
||||
|
||||
#if defined (BLAKECOIN_4WAY)
|
||||
void blakecoin_4way_hash(void *state, const void *input);
|
||||
int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
void blakecoinhash( void *state, const void *input );
|
||||
int scanhash_blakecoin( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#endif
|
@@ -1,4 +1,4 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "blakecoin-gate.h"
|
||||
#define BLAKE32_ROUNDS 8
|
||||
#include "sph_blake.h"
|
||||
|
||||
@@ -98,7 +98,7 @@ void blakecoin_gen_merkle_root ( char* merkle_root, struct stratum_ctx* sctx )
|
||||
SHA256( sctx->job.coinbase, (int)sctx->job.coinbase_size, merkle_root );
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blakecoin_get_max64 ()
|
||||
{
|
||||
@@ -121,4 +121,4 @@ bool register_blakecoin_algo( algo_gate_t* gate )
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
return true;
|
||||
}
|
||||
|
||||
*/
|
||||
|
@@ -1,5 +1,4 @@
|
||||
#include "decred-gate.h"
|
||||
#include "sph_blake.h"
|
||||
#include "blake-hash-4way.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
@@ -9,7 +8,6 @@
|
||||
#if defined (DECRED_4WAY)
|
||||
|
||||
static __thread blake256_4way_context blake_mid;
|
||||
static __thread bool ctx_midstate_done = false;
|
||||
|
||||
void decred_hash_4way( void *state, const void *input )
|
||||
{
|
||||
@@ -18,50 +16,14 @@ void decred_hash_4way( void *state, const void *input )
|
||||
uint32_t hash1[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (32)));
|
||||
blake256_4way_context ctx __attribute__ ((aligned (64)));
|
||||
|
||||
sph_blake256_context ctx2 __attribute__ ((aligned (64)));
|
||||
uint32_t hash[16] __attribute__ ((aligned (64)));
|
||||
uint32_t sin0[45], sin1[45], sin2[45], sin3[45];
|
||||
|
||||
mm_deinterleave_4x32x( sin0, sin1, sin2, sin3, input, 180*8 );
|
||||
|
||||
void *tail = input + ( DECRED_MIDSTATE_LEN << 2 );
|
||||
int tail_len = 180 - DECRED_MIDSTATE_LEN;
|
||||
blake256_4way_context ctx __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx, &blake_mid, sizeof(blake_mid) );
|
||||
blake256_4way( &ctx, tail, tail_len );
|
||||
blake256_4way_close( &ctx, vhash );
|
||||
/*
|
||||
sph_blake256_init( &ctx2 );
|
||||
sph_blake256( &ctx2, sin0, 180 );
|
||||
sph_blake256_close( &ctx2, hash );
|
||||
*/
|
||||
/*
|
||||
blake256_4way_init( &ctx );
|
||||
blake256_4way( &ctx, input, 180 );
|
||||
blake256_4way_close( &ctx, vhash );
|
||||
*/
|
||||
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
/*
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( hash[i] != hash0[i] )
|
||||
printf(" hash mismatch, i = %u\n",i);
|
||||
|
||||
printf("hash: %08lx %08lx %08lx %08lx\n", *hash, *(hash+1),
|
||||
*(hash+2), *(hash+3) );
|
||||
printf("hash0: %08lx %08lx %08lx %08lx\n", *hash0, *(hash0+1),
|
||||
*(hash0+2), *(hash0+3) );
|
||||
printf("\n");
|
||||
*/
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
memcpy( state+64, hash2, 32 );
|
||||
memcpy( state+96, hash3, 32 );
|
||||
|
||||
// memcpy( state, hash, 32 );
|
||||
|
||||
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
@@ -69,21 +31,21 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t vdata[48*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t _ALIGN(64) edata[48];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[DECRED_NONCE_INDEX];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
|
||||
uint32_t _ALIGN(64) edata[48];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[DECRED_NONCE_INDEX];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
|
||||
ctx_midstate_done = false;
|
||||
memcpy( edata, pdata, 180 );
|
||||
// copy to buffer guaranteed to be aligned.
|
||||
memcpy( edata, pdata, 180 );
|
||||
|
||||
// use the old way until new way updated for size.
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 180*8 );
|
||||
mm_interleave_4x32x( vdata, edata, edata, edata, edata, 180*8 );
|
||||
|
||||
blake256_4way_init( &blake_mid );
|
||||
blake256_4way( &blake_mid, vdata, DECRED_MIDSTATE_LEN );
|
||||
@@ -106,22 +68,13 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
nonces[0] = n;
|
||||
pdata[DECRED_NONCE_INDEX] = n;
|
||||
}
|
||||
/*
|
||||
if ( (hash+8)[7] <= HTarget && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
printf("found 1\n");
|
||||
|
||||
printf("vhash: %08lx %08lx %08lx %08lx\n", hash[8], hash[9], hash[10],hash[11] );
|
||||
printf("vhash: %08lx %08lx %08lx %08lx\n", hash[12], hash[13], hash[14],hash[15] );
|
||||
printf("shash: %08lx %08lx %08lx %08lx\n", shash[0], shash[1], shash[2],shash[3] );
|
||||
printf("shash: %08lx %08lx %08lx %08lx\n\n", shash[4], shash[5], shash[6],shash[7] );
|
||||
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
}
|
||||
*/
|
||||
if ( (hash+16)[7] <= HTarget && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
@@ -129,24 +82,15 @@ printf("shash: %08lx %08lx %08lx %08lx\n\n", shash[4], shash[5], shash[6],shash[
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
}
|
||||
/*
|
||||
|
||||
if ( (hash+24)[7] <= HTarget && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
printf("found 3\n");
|
||||
|
||||
printf("vhash: %08lx %08lx %08lx %08lx\n", hash[0], hash[1], hash[2],hash[3] );
|
||||
printf("vhash: %08lx %08lx %08lx %08lx\n", hash[4], hash[5], hash[6],hash[7] );
|
||||
printf("shash: %08lx %08lx %08lx %08lx\n", shash[0], shash[1], shash[2],shash[3] );
|
||||
printf("shash: %08lx %08lx %08lx %08lx\n\n", shash[4], shash[5], shash[6],shash[7] );
|
||||
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
}
|
||||
*/
|
||||
n += 2;
|
||||
// n += 4;
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
|
@@ -1,4 +1,7 @@
|
||||
#include "pentablake-gate.h"
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@@ -9,8 +12,6 @@
|
||||
|
||||
//#define DEBUG_ALGO
|
||||
|
||||
#ifdef PENTABLAKE_4WAY
|
||||
|
||||
extern void pentablakehash_4way( void *output, const void *input )
|
||||
{
|
||||
unsigned char _ALIGN(32) hash[128];
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(FOUR_WAY) && defined(__AVX__)
|
||||
#if defined(FOUR_WAY) && defined(__AVX2__)
|
||||
#define PENTABLAKE_4WAY
|
||||
#endif
|
||||
|
||||
|
@@ -41,19 +41,13 @@
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
//#include "sph_bmw.h"
|
||||
|
||||
//#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_BMW
|
||||
#define SPH_SMALL_FOOTPRINT_BMW 1
|
||||
//#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
//#undef SPH_ROTL64
|
||||
//#define SPH_ROTL64(x,n) (((x) << (n)) | ((x) >> (64 - (n))))
|
||||
//#define SPH_ROTL64(x,n) mm256_rotl_64(x,n)
|
||||
#define LPAR (
|
||||
|
||||
// BMW256
|
||||
|
||||
static const sph_u32 IV256[] = {
|
||||
SPH_C32(0x40414243), SPH_C32(0x44454647),
|
||||
@@ -66,8 +60,7 @@ static const sph_u32 IV256[] = {
|
||||
SPH_C32(0x78797A7B), SPH_C32(0x7C7D7E7F)
|
||||
};
|
||||
|
||||
#if SPH_64
|
||||
|
||||
// BMW512
|
||||
static const sph_u64 IV512[] = {
|
||||
SPH_C64(0x8081828384858687), SPH_C64(0x88898A8B8C8D8E8F),
|
||||
SPH_C64(0x9091929394959697), SPH_C64(0x98999A9B9C9D9E9F),
|
||||
@@ -79,74 +72,113 @@ static const sph_u64 IV512[] = {
|
||||
SPH_C64(0xF0F1F2F3F4F5F6F7), SPH_C64(0xF8F9FAFBFCFDFEFF)
|
||||
};
|
||||
|
||||
#endif
|
||||
// BMW256
|
||||
|
||||
#define XCAT(x, y) XCAT_(x, y)
|
||||
#define XCAT_(x, y) x ## y
|
||||
#define ss0(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_srli_epi32( (x), 1), \
|
||||
_mm_slli_epi32( (x), 3) ), \
|
||||
_mm_xor_si128( mm_rotl_32( (x), 4), \
|
||||
mm_rotl_32( (x), 19) ) )
|
||||
|
||||
#define LPAR (
|
||||
#define ss1(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_srli_epi32( (x), 1), \
|
||||
_mm_slli_epi32( (x), 2) ), \
|
||||
_mm_xor_si128( mm_rotl_32( (x), 8), \
|
||||
mm_rotl_32( (x), 23) ) )
|
||||
|
||||
/*
|
||||
#define ss0(x) (((x) >> 1) ^ SPH_T32((x) << 3) \
|
||||
^ SPH_ROTL32(x, 4) ^ SPH_ROTL32(x, 19))
|
||||
#define ss1(x) (((x) >> 1) ^ SPH_T32((x) << 2) \
|
||||
^ SPH_ROTL32(x, 8) ^ SPH_ROTL32(x, 23))
|
||||
#define ss2(x) (((x) >> 2) ^ SPH_T32((x) << 1) \
|
||||
^ SPH_ROTL32(x, 12) ^ SPH_ROTL32(x, 25))
|
||||
#define ss3(x) (((x) >> 2) ^ SPH_T32((x) << 2) \
|
||||
^ SPH_ROTL32(x, 15) ^ SPH_ROTL32(x, 29))
|
||||
#define ss4(x) (((x) >> 1) ^ (x))
|
||||
#define ss5(x) (((x) >> 2) ^ (x))
|
||||
#define rs1(x) SPH_ROTL32(x, 3)
|
||||
#define rs2(x) SPH_ROTL32(x, 7)
|
||||
#define rs3(x) SPH_ROTL32(x, 13)
|
||||
#define rs4(x) SPH_ROTL32(x, 16)
|
||||
#define rs5(x) SPH_ROTL32(x, 19)
|
||||
#define rs6(x) SPH_ROTL32(x, 23)
|
||||
#define rs7(x) SPH_ROTL32(x, 27)
|
||||
#define ss2(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_srli_epi32( (x), 2), \
|
||||
_mm_slli_epi32( (x), 1) ), \
|
||||
_mm_xor_si128( mm_rotl_32( (x), 12), \
|
||||
mm_rotl_32( (x), 25) ) )
|
||||
|
||||
#define Ks(j) SPH_T32((sph_u32)(j) * SPH_C32(0x05555555))
|
||||
#define ss3(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_srli_epi32( (x), 2), \
|
||||
_mm_slli_epi32( (x), 2) ), \
|
||||
_mm_xor_si128( mm_rotl_32( (x), 15), \
|
||||
mm_rotl_32( (x), 29) ) )
|
||||
|
||||
#define add_elt_s(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
|
||||
(SPH_T32(SPH_ROTL32(mf(j0m), j1m) + SPH_ROTL32(mf(j3m), j4m) \
|
||||
- SPH_ROTL32(mf(j10m), j11m) + Ks(j16)) ^ hf(j7m))
|
||||
#define ss4(x) \
|
||||
_mm_xor_si128( (x), _mm_srli_epi32( (x), 1 ) )
|
||||
|
||||
#define expand1s_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T32(ss1(qf(i0)) + ss2(qf(i1)) + ss3(qf(i2)) + ss0(qf(i3)) \
|
||||
+ ss1(qf(i4)) + ss2(qf(i5)) + ss3(qf(i6)) + ss0(qf(i7)) \
|
||||
+ ss1(qf(i8)) + ss2(qf(i9)) + ss3(qf(i10)) + ss0(qf(i11)) \
|
||||
+ ss1(qf(i12)) + ss2(qf(i13)) + ss3(qf(i14)) + ss0(qf(i15)) \
|
||||
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
#define ss5(x) \
|
||||
_mm_xor_si128( (x), _mm_srli_epi32( (x), 2 ) )
|
||||
|
||||
#define expand1s(qf, mf, hf, i16) \
|
||||
expand1s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand1s_(qf, mf, hf, i16, ix, iy) \
|
||||
expand1s_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
#define rs1(x) mm_rotl_32( x, 3 )
|
||||
#define rs2(x) mm_rotl_32( x, 7 )
|
||||
#define rs3(x) mm_rotl_32( x, 13 )
|
||||
#define rs4(x) mm_rotl_32( x, 16 )
|
||||
#define rs5(x) mm_rotl_32( x, 19 )
|
||||
#define rs6(x) mm_rotl_32( x, 23 )
|
||||
#define rs7(x) mm_rotl_32( x, 27 )
|
||||
|
||||
#define expand2s_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T32(qf(i0) + rs1(qf(i1)) + qf(i2) + rs2(qf(i3)) \
|
||||
+ qf(i4) + rs3(qf(i5)) + qf(i6) + rs4(qf(i7)) \
|
||||
+ qf(i8) + rs5(qf(i9)) + qf(i10) + rs6(qf(i11)) \
|
||||
+ qf(i12) + rs7(qf(i13)) + ss4(qf(i14)) + ss5(qf(i15)) \
|
||||
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
#define rol_off_32( M, j, off ) \
|
||||
mm_rotl_32( M[ ( (j) + (off) ) & 0xF ] , \
|
||||
( ( (j) + (off) ) & 0xF ) + 1 )
|
||||
|
||||
#define expand2s(qf, mf, hf, i16) \
|
||||
expand2s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand2s_(qf, mf, hf, i16, ix, iy) \
|
||||
expand2s_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
*/
|
||||
#if SPH_64
|
||||
#define add_elt_s( M, H, j ) \
|
||||
_mm_xor_si128( \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( _mm_add_epi32( rol_off_32( M, j, 0 ), \
|
||||
rol_off_32( M, j, 3 ) ), \
|
||||
rol_off_32( M, j, 10 ) ), \
|
||||
_mm_set1_epi32( ( (j) + 16 ) * 0x05555555UL ) ), \
|
||||
H[ ( (j)+7 ) & 0xF ] )
|
||||
|
||||
|
||||
#define expand1s( qt, M, H, i ) \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( ss1( qt[ (i)-16 ] ), \
|
||||
ss2( qt[ (i)-15 ] ) ), \
|
||||
_mm_add_epi32( ss3( qt[ (i)-14 ] ), \
|
||||
ss0( qt[ (i)-13 ] ) ) ), \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( ss1( qt[ (i)-12 ] ), \
|
||||
ss2( qt[ (i)-11 ] ) ), \
|
||||
_mm_add_epi32( ss3( qt[ (i)-10 ] ), \
|
||||
ss0( qt[ (i)- 9 ] ) ) ) ), \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( ss1( qt[ (i)- 8 ] ), \
|
||||
ss2( qt[ (i)- 7 ] ) ), \
|
||||
_mm_add_epi32( ss3( qt[ (i)- 6 ] ), \
|
||||
ss0( qt[ (i)- 5 ] ) ) ), \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( ss1( qt[ (i)- 4 ] ), \
|
||||
ss2( qt[ (i)- 3 ] ) ), \
|
||||
_mm_add_epi32( ss3( qt[ (i)- 2 ] ), \
|
||||
ss0( qt[ (i)- 1 ] ) ) ) ) ), \
|
||||
add_elt_s( M, H, (i)-16 ) )
|
||||
|
||||
#define expand2s( qt, M, H, i) \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( qt[ (i)-16 ], rs1( qt[ (i)-15 ] ) ), \
|
||||
_mm_add_epi32( qt[ (i)-14 ], rs2( qt[ (i)-13 ] ) ) ), \
|
||||
_mm_add_epi64( \
|
||||
_mm_add_epi32( qt[ (i)-12 ], rs3( qt[ (i)-11 ] ) ), \
|
||||
_mm_add_epi32( qt[ (i)-10 ], rs4( qt[ (i)- 9 ] ) ) ) ), \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( qt[ (i)- 8 ], rs5( qt[ (i)- 7 ] ) ), \
|
||||
_mm_add_epi32( qt[ (i)- 6 ], rs6( qt[ (i)- 5 ] ) ) ), \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( qt[ (i)- 4 ], rs7( qt[ (i)- 3 ] ) ), \
|
||||
_mm_add_epi32( ss4( qt[ (i)- 2 ] ), \
|
||||
ss5( qt[ (i)- 1 ] ) ) ) ) ), \
|
||||
add_elt_s( M, H, (i)-16 ) )
|
||||
|
||||
// BMW512
|
||||
|
||||
#define sb0(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 1), \
|
||||
_mm256_slli_epi64( (x), 3) ), \
|
||||
_mm256_xor_si256( mm256_rotl_64( (x), 4), \
|
||||
_mm256_xor_si256( mm256_rotl_64( (x), 4), \
|
||||
mm256_rotl_64( (x), 37) ) )
|
||||
|
||||
#define sb1(x) \
|
||||
@@ -181,18 +213,18 @@ static const sph_u64 IV512[] = {
|
||||
#define rb6(x) mm256_rotl_64( x, 43 )
|
||||
#define rb7(x) mm256_rotl_64( x, 53 )
|
||||
|
||||
#define rol_off( M, j, off ) \
|
||||
mm256_rotl_64( M[ ( (j) + (off) ) & 15 ] , \
|
||||
( ( (j) + (off) ) & 15 ) + 1 )
|
||||
#define rol_off_64( M, j, off ) \
|
||||
mm256_rotl_64( M[ ( (j) + (off) ) & 0xF ] , \
|
||||
( ( (j) + (off) ) & 0xF ) + 1 )
|
||||
|
||||
#define add_elt_b( M, H, j ) \
|
||||
_mm256_xor_si256( \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_sub_epi64( _mm256_add_epi64( rol_off( M, j, 0 ), \
|
||||
rol_off( M, j, 3 ) ), \
|
||||
rol_off( M, j, 10 ) ), \
|
||||
_mm256_sub_epi64( _mm256_add_epi64( rol_off_64( M, j, 0 ), \
|
||||
rol_off_64( M, j, 3 ) ), \
|
||||
rol_off_64( M, j, 10 ) ), \
|
||||
_mm256_set1_epi64x( ( (j) + 16 ) * 0x0555555555555555ULL ) ), \
|
||||
H[ ( (j)+7 ) & 15 ] )
|
||||
H[ ( (j)+7 ) & 0xF ] )
|
||||
|
||||
#define expand1b( qt, M, H, i ) \
|
||||
_mm256_add_epi64( \
|
||||
@@ -241,132 +273,301 @@ static const sph_u64 IV512[] = {
|
||||
sb5( qt[ (i)- 1 ] ) ) ) ) ), \
|
||||
add_elt_b( M, H, (i)-16 ) )
|
||||
|
||||
#endif
|
||||
// BMW256
|
||||
|
||||
/*
|
||||
#define MAKE_W( i0, op01, i1, op12, i2, op23, i3, op34, i4) \
|
||||
((M(i0) ^ H(i0)) op01 (M(i1) ^ H(i1)) op12 (M(i2) ^ H(i2)) \
|
||||
op23 (M(i3) ^ H(i3)) op34 (M(i4) ^ H(i4)))
|
||||
*/
|
||||
#define Ws0 \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( _mm_xor_si128( M[ 5], H[ 5] ), \
|
||||
_mm_xor_si128( M[ 7], H[ 7] ) ), \
|
||||
_mm_xor_si128( M[10], H[10] ) ), \
|
||||
_mm_xor_si128( M[13], H[13] ) ), \
|
||||
_mm_xor_si128( M[14], H[14] ) )
|
||||
|
||||
/*
|
||||
#define Ws0 MAKE_W(SPH_T32, 5, -, 7, +, 10, +, 13, +, 14)
|
||||
#define Ws1 MAKE_W(SPH_T32, 6, -, 8, +, 11, +, 14, -, 15)
|
||||
#define Ws2 MAKE_W(SPH_T32, 0, +, 7, +, 9, -, 12, +, 15)
|
||||
#define Ws3 MAKE_W(SPH_T32, 0, -, 1, +, 8, -, 10, +, 13)
|
||||
#define Ws4 MAKE_W(SPH_T32, 1, +, 2, +, 9, -, 11, -, 14)
|
||||
#define Ws5 MAKE_W(SPH_T32, 3, -, 2, +, 10, -, 12, +, 15)
|
||||
#define Ws6 MAKE_W(SPH_T32, 4, -, 0, -, 3, -, 11, +, 13)
|
||||
#define Ws7 MAKE_W(SPH_T32, 1, -, 4, -, 5, -, 12, -, 14)
|
||||
#define Ws8 MAKE_W(SPH_T32, 2, -, 5, -, 6, +, 13, -, 15)
|
||||
#define Ws9 MAKE_W(SPH_T32, 0, -, 3, +, 6, -, 7, +, 14)
|
||||
#define Ws10 MAKE_W(SPH_T32, 8, -, 1, -, 4, -, 7, +, 15)
|
||||
#define Ws11 MAKE_W(SPH_T32, 8, -, 0, -, 2, -, 5, +, 9)
|
||||
#define Ws12 MAKE_W(SPH_T32, 1, +, 3, -, 6, -, 9, +, 10)
|
||||
#define Ws13 MAKE_W(SPH_T32, 2, +, 4, +, 7, +, 10, +, 11)
|
||||
#define Ws14 MAKE_W(SPH_T32, 3, -, 5, +, 8, -, 11, -, 12)
|
||||
#define Ws15 MAKE_W(SPH_T32, 12, -, 4, -, 6, -, 9, +, 13)
|
||||
#define Ws1 \
|
||||
_mm_sub_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( _mm_xor_si128( M[ 6], H[ 6] ), \
|
||||
_mm_xor_si128( M[ 8], H[ 8] ) ), \
|
||||
_mm_xor_si128( M[11], H[11] ) ), \
|
||||
_mm_xor_si128( M[14], H[14] ) ), \
|
||||
_mm_xor_si128( M[15], H[15] ) )
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_BMW
|
||||
#define Ws2 \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( _mm_xor_si128( M[ 0], H[ 0] ), \
|
||||
_mm_xor_si128( M[ 7], H[ 7] ) ), \
|
||||
_mm_xor_si128( M[ 9], H[ 9] ) ), \
|
||||
_mm_xor_si128( M[12], H[12] ) ), \
|
||||
_mm_xor_si128( M[15], H[15] ) )
|
||||
|
||||
#define MAKE_Qas do { \
|
||||
unsigned u; \
|
||||
sph_u32 Ws[16]; \
|
||||
Ws[ 0] = Ws0; \
|
||||
Ws[ 1] = Ws1; \
|
||||
Ws[ 2] = Ws2; \
|
||||
Ws[ 3] = Ws3; \
|
||||
Ws[ 4] = Ws4; \
|
||||
Ws[ 5] = Ws5; \
|
||||
Ws[ 6] = Ws6; \
|
||||
Ws[ 7] = Ws7; \
|
||||
Ws[ 8] = Ws8; \
|
||||
Ws[ 9] = Ws9; \
|
||||
Ws[10] = Ws10; \
|
||||
Ws[11] = Ws11; \
|
||||
Ws[12] = Ws12; \
|
||||
Ws[13] = Ws13; \
|
||||
Ws[14] = Ws14; \
|
||||
Ws[15] = Ws15; \
|
||||
for (u = 0; u < 15; u += 5) { \
|
||||
qt[u + 0] = SPH_T32(ss0(Ws[u + 0]) + H(u + 1)); \
|
||||
qt[u + 1] = SPH_T32(ss1(Ws[u + 1]) + H(u + 2)); \
|
||||
qt[u + 2] = SPH_T32(ss2(Ws[u + 2]) + H(u + 3)); \
|
||||
qt[u + 3] = SPH_T32(ss3(Ws[u + 3]) + H(u + 4)); \
|
||||
qt[u + 4] = SPH_T32(ss4(Ws[u + 4]) + H(u + 5)); \
|
||||
} \
|
||||
qt[15] = SPH_T32(ss0(Ws[15]) + H(0)); \
|
||||
} while (0)
|
||||
#define Ws3 \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( _mm_xor_si128( M[ 0], H[ 0] ), \
|
||||
_mm_xor_si128( M[ 1], H[ 1] ) ), \
|
||||
_mm_xor_si128( M[ 8], H[ 8] ) ), \
|
||||
_mm_xor_si128( M[10], H[10] ) ), \
|
||||
_mm_xor_si128( M[13], H[13] ) )
|
||||
|
||||
#define MAKE_Qbs do { \
|
||||
qt[16] = expand1s(Qs, M, H, 16); \
|
||||
qt[17] = expand1s(Qs, M, H, 17); \
|
||||
qt[18] = expand2s(Qs, M, H, 18); \
|
||||
qt[19] = expand2s(Qs, M, H, 19); \
|
||||
qt[20] = expand2s(Qs, M, H, 20); \
|
||||
qt[21] = expand2s(Qs, M, H, 21); \
|
||||
qt[22] = expand2s(Qs, M, H, 22); \
|
||||
qt[23] = expand2s(Qs, M, H, 23); \
|
||||
qt[24] = expand2s(Qs, M, H, 24); \
|
||||
qt[25] = expand2s(Qs, M, H, 25); \
|
||||
qt[26] = expand2s(Qs, M, H, 26); \
|
||||
qt[27] = expand2s(Qs, M, H, 27); \
|
||||
qt[28] = expand2s(Qs, M, H, 28); \
|
||||
qt[29] = expand2s(Qs, M, H, 29); \
|
||||
qt[30] = expand2s(Qs, M, H, 30); \
|
||||
qt[31] = expand2s(Qs, M, H, 31); \
|
||||
} while (0)
|
||||
#define Ws4 \
|
||||
_mm_sub_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( _mm_xor_si128( M[ 1], H[ 1] ), \
|
||||
_mm_xor_si128( M[ 2], H[ 2] ) ), \
|
||||
_mm_xor_si128( M[ 9], H[ 9] ) ), \
|
||||
_mm_xor_si128( M[11], H[11] ) ), \
|
||||
_mm_xor_si128( M[14], H[14] ) )
|
||||
|
||||
#else
|
||||
#define Ws5 \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( _mm_xor_si128( M[ 3], H[ 3] ), \
|
||||
_mm_xor_si128( M[ 2], H[ 2] ) ), \
|
||||
_mm_xor_si128( M[10], H[10] ) ), \
|
||||
_mm_xor_si128( M[12], H[12] ) ), \
|
||||
_mm_xor_si128( M[15], H[15] ) )
|
||||
|
||||
#define MAKE_Qas do { \
|
||||
qt[ 0] = SPH_T32(ss0(Ws0 ) + H( 1)); \
|
||||
qt[ 1] = SPH_T32(ss1(Ws1 ) + H( 2)); \
|
||||
qt[ 2] = SPH_T32(ss2(Ws2 ) + H( 3)); \
|
||||
qt[ 3] = SPH_T32(ss3(Ws3 ) + H( 4)); \
|
||||
qt[ 4] = SPH_T32(ss4(Ws4 ) + H( 5)); \
|
||||
qt[ 5] = SPH_T32(ss0(Ws5 ) + H( 6)); \
|
||||
qt[ 6] = SPH_T32(ss1(Ws6 ) + H( 7)); \
|
||||
qt[ 7] = SPH_T32(ss2(Ws7 ) + H( 8)); \
|
||||
qt[ 8] = SPH_T32(ss3(Ws8 ) + H( 9)); \
|
||||
qt[ 9] = SPH_T32(ss4(Ws9 ) + H(10)); \
|
||||
qt[10] = SPH_T32(ss0(Ws10) + H(11)); \
|
||||
qt[11] = SPH_T32(ss1(Ws11) + H(12)); \
|
||||
qt[12] = SPH_T32(ss2(Ws12) + H(13)); \
|
||||
qt[13] = SPH_T32(ss3(Ws13) + H(14)); \
|
||||
qt[14] = SPH_T32(ss4(Ws14) + H(15)); \
|
||||
qt[15] = SPH_T32(ss0(Ws15) + H( 0)); \
|
||||
} while (0)
|
||||
#define Ws6 \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_sub_epi32( _mm_xor_si128( M[ 4], H[ 4] ), \
|
||||
_mm_xor_si128( M[ 0], H[ 0] ) ), \
|
||||
_mm_xor_si128( M[ 3], H[ 3] ) ), \
|
||||
_mm_xor_si128( M[11], H[11] ) ), \
|
||||
_mm_xor_si128( M[13], H[13] ) )
|
||||
|
||||
#define MAKE_Qbs do { \
|
||||
qt[16] = expand1s(Qs, M, H, 16); \
|
||||
qt[17] = expand1s(Qs, M, H, 17); \
|
||||
qt[18] = expand2s(Qs, M, H, 18); \
|
||||
qt[19] = expand2s(Qs, M, H, 19); \
|
||||
qt[20] = expand2s(Qs, M, H, 20); \
|
||||
qt[21] = expand2s(Qs, M, H, 21); \
|
||||
qt[22] = expand2s(Qs, M, H, 22); \
|
||||
qt[23] = expand2s(Qs, M, H, 23); \
|
||||
qt[24] = expand2s(Qs, M, H, 24); \
|
||||
qt[25] = expand2s(Qs, M, H, 25); \
|
||||
qt[26] = expand2s(Qs, M, H, 26); \
|
||||
qt[27] = expand2s(Qs, M, H, 27); \
|
||||
qt[28] = expand2s(Qs, M, H, 28); \
|
||||
qt[29] = expand2s(Qs, M, H, 29); \
|
||||
qt[30] = expand2s(Qs, M, H, 30); \
|
||||
qt[31] = expand2s(Qs, M, H, 31); \
|
||||
} while (0)
|
||||
#define Ws7 \
|
||||
_mm_sub_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_sub_epi32( _mm_xor_si128( M[ 1], H[ 1] ), \
|
||||
_mm_xor_si128( M[ 4], H[ 4] ) ), \
|
||||
_mm_xor_si128( M[ 5], H[ 5] ) ), \
|
||||
_mm_xor_si128( M[12], H[12] ) ), \
|
||||
_mm_xor_si128( M[14], H[14] ) )
|
||||
|
||||
#endif
|
||||
#define Ws8 \
|
||||
_mm_sub_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_sub_epi32( _mm_xor_si128( M[ 2], H[ 2] ), \
|
||||
_mm_xor_si128( M[ 5], H[ 5] ) ), \
|
||||
_mm_xor_si128( M[ 6], H[ 6] ) ), \
|
||||
_mm_xor_si128( M[13], H[13] ) ), \
|
||||
_mm_xor_si128( M[15], H[15] ) )
|
||||
|
||||
#define MAKE_Qs do { \
|
||||
MAKE_Qas; \
|
||||
MAKE_Qbs; \
|
||||
} while (0)
|
||||
#define Ws9 \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( _mm_xor_si128( M[ 0], H[ 0] ), \
|
||||
_mm_xor_si128( M[ 3], H[ 3] ) ), \
|
||||
_mm_xor_si128( M[ 6], H[ 6] ) ), \
|
||||
_mm_xor_si128( M[ 7], H[ 7] ) ), \
|
||||
_mm_xor_si128( M[14], H[14] ) )
|
||||
|
||||
#define Qs(j) (qt[j])
|
||||
*/
|
||||
#if SPH_64
|
||||
#define Ws10 \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_sub_epi32( _mm_xor_si128( M[ 8], H[ 8] ), \
|
||||
_mm_xor_si128( M[ 1], H[ 1] ) ), \
|
||||
_mm_xor_si128( M[ 4], H[ 4] ) ), \
|
||||
_mm_xor_si128( M[ 7], H[ 7] ) ), \
|
||||
_mm_xor_si128( M[15], H[15] ) )
|
||||
|
||||
#define Ws11 \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_sub_epi32( _mm_xor_si128( M[ 8], H[ 8] ), \
|
||||
_mm_xor_si128( M[ 0], H[ 0] ) ), \
|
||||
_mm_xor_si128( M[ 2], H[ 2] ) ), \
|
||||
_mm_xor_si128( M[ 5], H[ 5] ) ), \
|
||||
_mm_xor_si128( M[ 9], H[ 9] ) )
|
||||
|
||||
#define Ws12 \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_add_epi32( _mm_xor_si128( M[ 1], H[ 1] ), \
|
||||
_mm_xor_si128( M[ 3], H[ 3] ) ), \
|
||||
_mm_xor_si128( M[ 6], H[ 6] ) ), \
|
||||
_mm_xor_si128( M[ 9], H[ 9] ) ), \
|
||||
_mm_xor_si128( M[10], H[10] ) )
|
||||
|
||||
#define Ws13 \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( _mm_xor_si128( M[ 2], H[ 2] ), \
|
||||
_mm_xor_si128( M[ 4], H[ 4] ) ), \
|
||||
_mm_xor_si128( M[ 7], H[ 7] ) ), \
|
||||
_mm_xor_si128( M[10], H[10] ) ), \
|
||||
_mm_xor_si128( M[11], H[11] ) )
|
||||
|
||||
#define Ws14 \
|
||||
_mm_sub_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( _mm_xor_si128( M[ 3], H[ 3] ), \
|
||||
_mm_xor_si128( M[ 5], H[ 5] ) ), \
|
||||
_mm_xor_si128( M[ 8], H[ 8] ) ), \
|
||||
_mm_xor_si128( M[11], H[11] ) ), \
|
||||
_mm_xor_si128( M[12], H[12] ) )
|
||||
|
||||
#define Ws15 \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_sub_epi32( \
|
||||
_mm_sub_epi32( _mm_xor_si128( M[12], H[12] ), \
|
||||
_mm_xor_si128( M[ 4], H[ 4] ) ), \
|
||||
_mm_xor_si128( M[ 6], H[ 6] ) ), \
|
||||
_mm_xor_si128( M[ 9], H[ 9] ) ), \
|
||||
_mm_xor_si128( M[13], H[13] ) )
|
||||
|
||||
|
||||
void compress_small( const __m128i *M, const __m128i H[16], __m128i dH[16] )
|
||||
{
|
||||
__m128i qt[32], xl, xh; \
|
||||
|
||||
qt[ 0] = ss0( Ws0 ) + H[ 1];
|
||||
qt[ 1] = ss1( Ws1 ) + H[ 2];
|
||||
qt[ 2] = ss2( Ws2 ) + H[ 3];
|
||||
qt[ 3] = ss3( Ws3 ) + H[ 4];
|
||||
qt[ 4] = ss4( Ws4 ) + H[ 5];
|
||||
qt[ 5] = ss0( Ws5 ) + H[ 6];
|
||||
qt[ 6] = ss1( Ws6 ) + H[ 7];
|
||||
qt[ 7] = ss2( Ws7 ) + H[ 8];
|
||||
qt[ 8] = ss3( Ws8 ) + H[ 9];
|
||||
qt[ 9] = ss4( Ws9 ) + H[10];
|
||||
qt[10] = ss0( Ws10) + H[11];
|
||||
qt[11] = ss1( Ws11) + H[12];
|
||||
qt[12] = ss2( Ws12) + H[13];
|
||||
qt[13] = ss3( Ws13) + H[14];
|
||||
qt[14] = ss4( Ws14) + H[15];
|
||||
qt[15] = ss0( Ws15) + H[ 0];
|
||||
qt[16] = expand1s( qt, M, H, 16 );
|
||||
qt[17] = expand1s( qt, M, H, 17 );
|
||||
qt[18] = expand2s( qt, M, H, 18 );
|
||||
qt[19] = expand2s( qt, M, H, 19 );
|
||||
qt[20] = expand2s( qt, M, H, 20 );
|
||||
qt[21] = expand2s( qt, M, H, 21 );
|
||||
qt[22] = expand2s( qt, M, H, 22 );
|
||||
qt[23] = expand2s( qt, M, H, 23 );
|
||||
qt[24] = expand2s( qt, M, H, 24 );
|
||||
qt[25] = expand2s( qt, M, H, 25 );
|
||||
qt[26] = expand2s( qt, M, H, 26 );
|
||||
qt[27] = expand2s( qt, M, H, 27 );
|
||||
qt[28] = expand2s( qt, M, H, 28 );
|
||||
qt[29] = expand2s( qt, M, H, 29 );
|
||||
qt[30] = expand2s( qt, M, H, 30 );
|
||||
qt[31] = expand2s( qt, M, H, 31 );
|
||||
|
||||
xl = _mm_xor_si128(
|
||||
_mm_xor_si128( _mm_xor_si128( qt[16], qt[17] ),
|
||||
_mm_xor_si128( qt[18], qt[19] ) ),
|
||||
_mm_xor_si128( _mm_xor_si128( qt[20], qt[21] ),
|
||||
_mm_xor_si128( qt[22], qt[23] ) ) );
|
||||
xh = _mm_xor_si128( xl,
|
||||
_mm_xor_si128(
|
||||
_mm_xor_si128( _mm_xor_si128( qt[24], qt[25] ),
|
||||
_mm_xor_si128( qt[26], qt[27] ) ),
|
||||
_mm_xor_si128( _mm_xor_si128( qt[28], qt[29] ),
|
||||
_mm_xor_si128( qt[30], qt[31] ) )));
|
||||
|
||||
dH[ 0] = _mm_add_epi32(
|
||||
_mm_xor_si128( M[0],
|
||||
_mm_xor_si128( _mm_slli_epi32( xh, 5 ),
|
||||
_mm_srli_epi32( qt[16], 5 ) ) ),
|
||||
_mm_xor_si128( _mm_xor_si128( xl, qt[24] ), qt[ 0] ));
|
||||
dH[ 1] = _mm_add_epi32(
|
||||
_mm_xor_si128( M[1],
|
||||
_mm_xor_si128( _mm_srli_epi32( xh, 7 ),
|
||||
_mm_slli_epi32( qt[17], 8 ) ) ),
|
||||
_mm_xor_si128( _mm_xor_si128( xl, qt[25] ), qt[ 1] ));
|
||||
dH[ 2] = _mm_add_epi32(
|
||||
_mm_xor_si128( M[2],
|
||||
_mm_xor_si128( _mm_srli_epi32( xh, 5 ),
|
||||
_mm_slli_epi32( qt[18], 5 ) ) ),
|
||||
_mm_xor_si128( _mm_xor_si128( xl, qt[26] ), qt[ 2] ));
|
||||
dH[ 3] = _mm_add_epi32(
|
||||
_mm_xor_si128( M[3],
|
||||
_mm_xor_si128( _mm_srli_epi32( xh, 1 ),
|
||||
_mm_slli_epi32( qt[19], 5 ) ) ),
|
||||
_mm_xor_si128( _mm_xor_si128( xl, qt[27] ), qt[ 3] ));
|
||||
dH[ 4] = _mm_add_epi32(
|
||||
_mm_xor_si128( M[4],
|
||||
_mm_xor_si128( _mm_srli_epi32( xh, 3 ),
|
||||
_mm_slli_epi32( qt[20], 0 ) ) ),
|
||||
_mm_xor_si128( _mm_xor_si128( xl, qt[28] ), qt[ 4] ));
|
||||
dH[ 5] = _mm_add_epi32(
|
||||
_mm_xor_si128( M[5],
|
||||
_mm_xor_si128( _mm_slli_epi32( xh, 6 ),
|
||||
_mm_srli_epi32( qt[21], 6 ) ) ),
|
||||
_mm_xor_si128( _mm_xor_si128( xl, qt[29] ), qt[ 5] ));
|
||||
dH[ 6] = _mm_add_epi32(
|
||||
_mm_xor_si128( M[6],
|
||||
_mm_xor_si128( _mm_srli_epi32( xh, 4 ),
|
||||
_mm_slli_epi32( qt[22], 6 ) ) ),
|
||||
_mm_xor_si128( _mm_xor_si128( xl, qt[30] ), qt[ 6] ));
|
||||
dH[ 7] = _mm_add_epi32(
|
||||
_mm_xor_si128( M[7],
|
||||
_mm_xor_si128( _mm_srli_epi32( xh, 11 ),
|
||||
_mm_slli_epi32( qt[23], 2 ) ) ),
|
||||
_mm_xor_si128( _mm_xor_si128( xl, qt[31] ), qt[ 7] ));
|
||||
dH[ 8] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[4], 9 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[24] ), M[ 8] )),
|
||||
_mm_xor_si128( _mm_slli_epi32( xl, 8 ),
|
||||
_mm_xor_si128( qt[23], qt[ 8] ) ) );
|
||||
dH[ 9] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[5], 10 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[25] ), M[ 9] )),
|
||||
_mm_xor_si128( _mm_srli_epi32( xl, 6 ),
|
||||
_mm_xor_si128( qt[16], qt[ 9] ) ) );
|
||||
dH[10] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[6], 11 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[26] ), M[10] )),
|
||||
_mm_xor_si128( _mm_slli_epi32( xl, 6 ),
|
||||
_mm_xor_si128( qt[17], qt[10] ) ) );
|
||||
dH[11] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[7], 12 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[27] ), M[11] )),
|
||||
_mm_xor_si128( _mm_slli_epi32( xl, 4 ),
|
||||
_mm_xor_si128( qt[18], qt[11] ) ) );
|
||||
dH[12] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[0], 13 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[28] ), M[12] )),
|
||||
_mm_xor_si128( _mm_srli_epi32( xl, 3 ),
|
||||
_mm_xor_si128( qt[19], qt[12] ) ) );
|
||||
dH[13] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[1], 14 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[29] ), M[13] )),
|
||||
_mm_xor_si128( _mm_srli_epi32( xl, 4 ),
|
||||
_mm_xor_si128( qt[20], qt[13] ) ) );
|
||||
dH[14] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[2], 15 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[30] ), M[14] )),
|
||||
_mm_xor_si128( _mm_srli_epi32( xl, 7 ),
|
||||
_mm_xor_si128( qt[21], qt[14] ) ) );
|
||||
dH[15] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[3], 16 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[31] ), M[15] )),
|
||||
_mm_xor_si128( _mm_srli_epi32( xl, 2 ),
|
||||
_mm_xor_si128( qt[22], qt[15] ) ) );
|
||||
}
|
||||
|
||||
// BMW512
|
||||
|
||||
#define Wb0 \
|
||||
_mm256_add_epi64( \
|
||||
@@ -564,6 +765,7 @@ void compress_big( const __m256i *M, const __m256i H[16], __m256i dH[16] )
|
||||
qt[29] = expand2b( qt, M, H, 29 );
|
||||
qt[30] = expand2b( qt, M, H, 30 );
|
||||
qt[31] = expand2b( qt, M, H, 31 );
|
||||
|
||||
xl = _mm256_xor_si256(
|
||||
_mm256_xor_si256( _mm256_xor_si256( qt[16], qt[17] ),
|
||||
_mm256_xor_si256( qt[18], qt[19] ) ),
|
||||
@@ -575,6 +777,7 @@ void compress_big( const __m256i *M, const __m256i H[16], __m256i dH[16] )
|
||||
_mm256_xor_si256( qt[26], qt[27] ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( qt[28], qt[29] ),
|
||||
_mm256_xor_si256( qt[30], qt[31] ) )));
|
||||
|
||||
dH[ 0] = _mm256_add_epi64(
|
||||
_mm256_xor_si256( M[0],
|
||||
_mm256_xor_si256( _mm256_slli_epi64( xh, 5 ),
|
||||
@@ -657,137 +860,113 @@ void compress_big( const __m256i *M, const __m256i H[16], __m256i dH[16] )
|
||||
_mm256_xor_si256( qt[22], qt[15] ) ) );
|
||||
}
|
||||
|
||||
#endif // 64
|
||||
// BMW256
|
||||
|
||||
//#define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_ROTL32, M, Qs, dH)
|
||||
|
||||
|
||||
/*
|
||||
static void
|
||||
compress_small(const unsigned char *data, const sph_u32 h[16], sph_u32 dh[16])
|
||||
static const __m128i final_s[16] =
|
||||
{
|
||||
#define M(x) sph_dec32le_aligned(data + 4 * (x))
|
||||
#define H(x) (h[x])
|
||||
#define dH(x) (dh[x])
|
||||
|
||||
FOLDs;
|
||||
|
||||
#undef M
|
||||
#undef H
|
||||
#undef dH
|
||||
}
|
||||
|
||||
static const sph_u32 final_s[16] = {
|
||||
SPH_C32(0xaaaaaaa0), SPH_C32(0xaaaaaaa1), SPH_C32(0xaaaaaaa2),
|
||||
SPH_C32(0xaaaaaaa3), SPH_C32(0xaaaaaaa4), SPH_C32(0xaaaaaaa5),
|
||||
SPH_C32(0xaaaaaaa6), SPH_C32(0xaaaaaaa7), SPH_C32(0xaaaaaaa8),
|
||||
SPH_C32(0xaaaaaaa9), SPH_C32(0xaaaaaaaa), SPH_C32(0xaaaaaaab),
|
||||
SPH_C32(0xaaaaaaac), SPH_C32(0xaaaaaaad), SPH_C32(0xaaaaaaae),
|
||||
SPH_C32(0xaaaaaaaf)
|
||||
{ 0xaaaaaaa0aaaaaaa0, 0xaaaaaaa0aaaaaaa0 },
|
||||
{ 0xaaaaaaa1aaaaaaa1, 0xaaaaaaa1aaaaaaa1 },
|
||||
{ 0xaaaaaaa2aaaaaaa2, 0xaaaaaaa2aaaaaaa2 },
|
||||
{ 0xaaaaaaa3aaaaaaa3, 0xaaaaaaa3aaaaaaa3 },
|
||||
{ 0xaaaaaaa4aaaaaaa4, 0xaaaaaaa4aaaaaaa4 },
|
||||
{ 0xaaaaaaa5aaaaaaa5, 0xaaaaaaa5aaaaaaa5 },
|
||||
{ 0xaaaaaaa6aaaaaaa6, 0xaaaaaaa6aaaaaaa6 },
|
||||
{ 0xaaaaaaa7aaaaaaa7, 0xaaaaaaa7aaaaaaa7 },
|
||||
{ 0xaaaaaaa8aaaaaaa8, 0xaaaaaaa8aaaaaaa8 },
|
||||
{ 0xaaaaaaa9aaaaaaa9, 0xaaaaaaa9aaaaaaa9 },
|
||||
{ 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa },
|
||||
{ 0xaaaaaaabaaaaaaab, 0xaaaaaaabaaaaaaab },
|
||||
{ 0xaaaaaaacaaaaaaac, 0xaaaaaaacaaaaaaac },
|
||||
{ 0xaaaaaaadaaaaaaad, 0xaaaaaaadaaaaaaad },
|
||||
{ 0xaaaaaaaeaaaaaaae, 0xaaaaaaaeaaaaaaae },
|
||||
{ 0xaaaaaaafaaaaaaaf, 0xaaaaaaafaaaaaaaf }
|
||||
};
|
||||
|
||||
static void
|
||||
bmw32_4way_init(bmw_4way_small_context *sc, const sph_u32 *iv)
|
||||
{
|
||||
memcpy(sc->H, iv, sizeof sc->H);
|
||||
sc->ptr = 0;
|
||||
#if SPH_64
|
||||
sc->bit_count = 0;
|
||||
#else
|
||||
sc->bit_count_high = 0;
|
||||
sc->bit_count_low = 0;
|
||||
#endif
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
sc->H[i] = _mm_set1_epi32( iv[i] );
|
||||
sc->ptr = 0;
|
||||
sc->bit_count = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
bmw32_4way(bmw_4way_small_context *sc, const void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
size_t ptr;
|
||||
sph_u32 htmp[16];
|
||||
sph_u32 *h1, *h2;
|
||||
#if !SPH_64
|
||||
sph_u32 tmp;
|
||||
#endif
|
||||
__m128i *vdata = (__m128i*)data;
|
||||
__m128i *buf;
|
||||
__m128i htmp[16];
|
||||
__m128i *h1, *h2;
|
||||
size_t ptr;
|
||||
const int buf_size = 64; // bytes of one lane, compatible with len
|
||||
|
||||
#if SPH_64
|
||||
sc->bit_count += (sph_u64)len << 3;
|
||||
#else
|
||||
tmp = sc->bit_count_low;
|
||||
sc->bit_count_low = SPH_T32(tmp + ((sph_u32)len << 3));
|
||||
if (sc->bit_count_low < tmp)
|
||||
sc->bit_count_high ++;
|
||||
sc->bit_count_high += len >> 29;
|
||||
#endif
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
h1 = sc->H;
|
||||
h2 = htmp;
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof sc->buf) - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(buf + ptr, data, clen);
|
||||
data = (const unsigned char *)data + clen;
|
||||
len -= clen;
|
||||
ptr += clen;
|
||||
if (ptr == sizeof sc->buf) {
|
||||
sph_u32 *ht;
|
||||
|
||||
compress_small(buf, h1, h2);
|
||||
ht = h1;
|
||||
h1 = h2;
|
||||
h2 = ht;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
sc->ptr = ptr;
|
||||
if (h1 != sc->H)
|
||||
memcpy(sc->H, h1, sizeof sc->H);
|
||||
sc->bit_count += (sph_u64)len << 3;
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
h1 = sc->H;
|
||||
h2 = htmp;
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_128( buf + (ptr>>2), vdata, clen >> 2 );
|
||||
vdata += ( clen >> 2 );
|
||||
len -= clen;
|
||||
ptr += clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
__m128i *ht;
|
||||
compress_small( buf, h1, h2 );
|
||||
ht = h1;
|
||||
h1 = h2;
|
||||
h2 = ht;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
sc->ptr = ptr;
|
||||
if ( h1 != sc->H )
|
||||
memcpy_128( sc->H, h1, 16 );
|
||||
}
|
||||
|
||||
static void
|
||||
bmw32_4way_close(bmw_4way_small_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32)
|
||||
{
|
||||
unsigned char *buf, *out;
|
||||
size_t ptr, u, v;
|
||||
unsigned z;
|
||||
sph_u32 h1[16], h2[16], *h;
|
||||
__m128i *buf;
|
||||
__m128i h1[16], h2[16], *h;
|
||||
size_t ptr, u, v;
|
||||
unsigned z;
|
||||
const int buf_size = 64; // bytes of one lane, compatible with len
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
z = 0x80 >> n;
|
||||
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
h = sc->H;
|
||||
if (ptr > (sizeof sc->buf) - 8) {
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
|
||||
compress_small(buf, h, h1);
|
||||
ptr = 0;
|
||||
h = h1;
|
||||
}
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - 8 - ptr);
|
||||
#if SPH_64
|
||||
sph_enc64le_aligned(buf + (sizeof sc->buf) - 8,
|
||||
SPH_T64(sc->bit_count + n));
|
||||
#else
|
||||
sph_enc32le_aligned(buf + (sizeof sc->buf) - 8,
|
||||
sc->bit_count_low + n);
|
||||
sph_enc32le_aligned(buf + (sizeof sc->buf) - 4,
|
||||
SPH_T32(sc->bit_count_high));
|
||||
#endif
|
||||
compress_small(buf, h, h2);
|
||||
for (u = 0; u < 16; u ++)
|
||||
sph_enc32le_aligned(buf + 4 * u, h2[u]);
|
||||
compress_small(buf, final_s, h1);
|
||||
out = dst;
|
||||
for (u = 0, v = 16 - out_size_w32; u < out_size_w32; u ++, v ++)
|
||||
sph_enc32le(out + 4 * u, h1[v]);
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
z = 0x80 >> n;
|
||||
buf[ ptr>>2 ] = _mm_set1_epi32( z );
|
||||
ptr += 4;
|
||||
h = sc->H;
|
||||
|
||||
// assume bit_count fits in 32 bits
|
||||
if ( ptr > buf_size - 4 )
|
||||
{
|
||||
memset_zero_128( buf + (ptr>>2), (buf_size - ptr) >> 2 );
|
||||
compress_small( buf, h, h1 );
|
||||
ptr = 0;
|
||||
h = h1;
|
||||
}
|
||||
memset_zero_128( buf + (ptr>>2), (buf_size - 4 - ptr) >> 2 );
|
||||
buf[ (buf_size - 4) >> 2 ] = _mm_set1_epi32( sc->bit_count + n );
|
||||
compress_small( buf, h, h2 );
|
||||
for ( u = 0; u < 16; u ++ )
|
||||
buf[u] = h2[u];
|
||||
compress_small( buf, final_s, h1 );
|
||||
for (u = 0, v = 16 - out_size_w32; u < out_size_w32; u ++, v ++)
|
||||
casti_m128i( dst, u ) = h1[v];
|
||||
}
|
||||
*/
|
||||
#if SPH_64
|
||||
|
||||
// BMW512
|
||||
|
||||
static const __m256i final_b[16] =
|
||||
{
|
||||
@@ -908,33 +1087,33 @@ bmw64_4way_close(bmw_4way_big_context *sc, unsigned ub, unsigned n,
|
||||
casti_m256i(dst,u) = h1[v];
|
||||
}
|
||||
|
||||
#endif
|
||||
// BMW256
|
||||
|
||||
void
|
||||
bmw256_4way_init(void *cc)
|
||||
{
|
||||
// bmw32_4way_init(cc, IV256);
|
||||
bmw32_4way_init(cc, IV256);
|
||||
}
|
||||
|
||||
void
|
||||
bmw256_4way(void *cc, const void *data, size_t len)
|
||||
{
|
||||
// bmw32_4way(cc, data, len);
|
||||
bmw32_4way(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
bmw256_4way_close(void *cc, void *dst)
|
||||
{
|
||||
// bmw256_4way_addbits_and_close(cc, 0, 0, dst);
|
||||
bmw256_4way_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
void
|
||||
bmw256_4way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
// bmw32_4way_close(cc, ub, n, dst, 8);
|
||||
bmw32_4way_close(cc, ub, n, dst, 8);
|
||||
}
|
||||
|
||||
#if SPH_64
|
||||
// BMW512
|
||||
|
||||
void
|
||||
bmw512_4way_init(void *cc)
|
||||
@@ -960,10 +1139,8 @@ bmw512_4way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
bmw64_4way_close(cc, ub, n, dst, 8);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif // __AVX2__
|
||||
|
@@ -46,94 +46,37 @@ extern "C"{
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-224.
|
||||
*/
|
||||
#define SPH_SIZE_bmw224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-256.
|
||||
*/
|
||||
#define SPH_SIZE_bmw256 256
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-384.
|
||||
*/
|
||||
#define SPH_SIZE_bmw384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-512.
|
||||
*/
|
||||
#define SPH_SIZE_bmw512 512
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-224 and BMW-256 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a BMW computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running BMW
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 H[16];
|
||||
#if SPH_64
|
||||
sph_u64 bit_count;
|
||||
#else
|
||||
sph_u32 bit_count_high, bit_count_low;
|
||||
#endif
|
||||
#endif
|
||||
__m128i buf[64];
|
||||
__m128i H[16];
|
||||
size_t ptr;
|
||||
sph_u32 bit_count; // assume bit_count fits in 32 bits
|
||||
} bmw_4way_small_context;
|
||||
|
||||
typedef bmw_4way_small_context bmw256_4way_context;
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-384 and BMW-512 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a BMW computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running BMW
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
__m256i buf[16];
|
||||
__m256i H[16];
|
||||
|
||||
// unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
// sph_u64 H[16];
|
||||
sph_u64 bit_count;
|
||||
#endif
|
||||
size_t ptr;
|
||||
sph_u64 bit_count;
|
||||
} bmw_4way_big_context;
|
||||
|
||||
typedef bmw_4way_big_context bmw512_4way_context;
|
||||
|
||||
#endif
|
||||
|
||||
void bmw256_4way_init(void *cc);
|
||||
|
||||
void bmw256_4way(void *cc, const void *data, size_t len);
|
||||
|
||||
void bmw256_4way_close(void *cc, void *dst);
|
||||
|
||||
void bmw256_addbits_and_close(
|
||||
void bmw256_4way_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#if SPH_64
|
||||
|
||||
void bmw512_4way_init(void *cc);
|
||||
|
||||
void bmw512_4way(void *cc, const void *data, size_t len);
|
||||
@@ -150,5 +93,3 @@ void bmw512_4way_addbits_and_close(
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@@ -23,12 +23,12 @@ void jha_hash_4way( void *out, const void *input )
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash0[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash1[8*4] __attribute__ ((aligned (64)));
|
||||
__m256i mask, mask0, mask1;
|
||||
__m256i* vh = (__m256i*)vhash;
|
||||
__m256i* vh0 = (__m256i*)vhash0;
|
||||
__m256i* vh1 = (__m256i*)vhash1;
|
||||
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
|
||||
__m256i* vh = (__m256i*)vhash;
|
||||
__m256i* vhA = (__m256i*)vhashA;
|
||||
__m256i* vhB = (__m256i*)vhashB;
|
||||
__m256i vh_mask;
|
||||
|
||||
blake512_4way_context ctx_blake;
|
||||
hashState_groestl ctx_groestl;
|
||||
@@ -40,127 +40,69 @@ void jha_hash_4way( void *out, const void *input )
|
||||
keccak512_4way( &ctx_keccak, input, 80 );
|
||||
keccak512_4way_close( &ctx_keccak, vhash );
|
||||
|
||||
// memcpy( &ctx_keccak, &jha_kec_mid, sizeof jha_kec_mid );
|
||||
// keccak512_4way( &ctx_keccak, input + (64<<2), 16 );
|
||||
// keccak512_4way_close( &ctx_keccak, vhash );
|
||||
|
||||
// Heavy & Light Pair Loop
|
||||
for ( int round = 0; round < 3; round++ )
|
||||
{
|
||||
// select next function based on bit 0 of previous hash.
|
||||
// Specutively execute both functions and use mask to
|
||||
// select results from correct function for each lane.
|
||||
// hash = mask : vhash0 ? vhash1
|
||||
mask = mm256_negate_64(
|
||||
_mm256_and_si256( vh[0], _mm256_set1_epi64x( 0x1 ) ) );
|
||||
|
||||
// second version
|
||||
// mask0 = mask
|
||||
// mask1 = mm256_not( mask );
|
||||
|
||||
// first version
|
||||
// mask = _mm256_sub_epi64( _mm256_and_si256( vh[0],
|
||||
// _mm256_set1_epi64x( 0x1 ) ), _mm256_set1_epi64x( 0x1 ) );
|
||||
|
||||
// groestl (serial) vs skein
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256(
|
||||
vh[0], _mm256_set1_epi64x( 1 ) ), mm256_zero );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash0,
|
||||
(char*)hash0, 512 );
|
||||
(char*)hash0, 512 );
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash1,
|
||||
(char*)hash1, 512 );
|
||||
(char*)hash1, 512 );
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash2,
|
||||
(char*)hash2, 512 );
|
||||
(char*)hash2, 512 );
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
|
||||
mm256_interleave_4x64( vhash0, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// skein
|
||||
(char*)hash3, 512 );
|
||||
mm256_interleave_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
skein512_4way_init( &ctx_skein );
|
||||
skein512_4way( &ctx_skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx_skein, vhash1 );
|
||||
skein512_4way_close( &ctx_skein, vhashB );
|
||||
|
||||
// merge vectored hash
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
{
|
||||
// blend should be faster
|
||||
vh[i] = _mm256_blendv_epi8( vh0[i], vh1[i], mask );
|
||||
|
||||
// second version
|
||||
// vh[i] = _mm256_or_si256( _mm256_and_si256( vh0[i], mask0 ),
|
||||
// _mm256_and_si256( vh1[i], mask1 ) );
|
||||
|
||||
// first version
|
||||
/*
|
||||
vh0[i] = _mm256_maskload_epi64(
|
||||
vhash0 + i*4, mm256_not( mask ) );
|
||||
vh1[i] = _mm256_maskload_epi64(
|
||||
vhash1 + i*4, mask );
|
||||
vh[i] = _mm256_or_si256( vh0[i], vh1[i] );
|
||||
*/
|
||||
}
|
||||
|
||||
// blake v jh
|
||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
||||
|
||||
blake512_4way_init( &ctx_blake );
|
||||
blake512_4way( &ctx_blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx_blake, vhash0 );
|
||||
blake512_4way_close( &ctx_blake, vhashA );
|
||||
|
||||
jh512_4way_init( &ctx_jh );
|
||||
jh512_4way( &ctx_jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx_jh, vhash1 );
|
||||
jh512_4way_close( &ctx_jh, vhashB );
|
||||
|
||||
// merge hash
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
{
|
||||
vh[i] = _mm256_or_si256( _mm256_and_si256( vh0[i], mask0 ),
|
||||
_mm256_and_si256( vh1[i], mask1 ) );
|
||||
/*
|
||||
vha256[i] = _mm256_maskload_epi64(
|
||||
vhasha + i*4, mm256_not( mask ) );
|
||||
vhb256[i] = _mm256_maskload_epi64(
|
||||
vhashb + i*4, mask );
|
||||
vh256[i] = _mm256_or_si256( vha256[i], vhb256[i] );
|
||||
*/
|
||||
}
|
||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
||||
}
|
||||
|
||||
mm256_deinterleave_4x64( out, out+32, out+64, out+96, vhash, 256 );
|
||||
|
||||
// memcpy( output, hash0, 32 );
|
||||
// memcpy( output+32, hash1, 32 );
|
||||
// memcpy( output+64, hash2, 32 );
|
||||
// memcpy( output+96, hash3, 32 );
|
||||
|
||||
}
|
||||
|
||||
int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
|
||||
uint64_t htmax[] = {
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
@@ -168,7 +110,7 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
@@ -177,17 +119,12 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
0
|
||||
};
|
||||
|
||||
// we need bigendian data...
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
// precalc midstate for keccak
|
||||
// keccak512_4way_init( &jha_kec_mid );
|
||||
// keccak512_4way( &jha_kec_mid, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ )
|
||||
{
|
||||
if ( Htarg <= htmax[m] )
|
||||
@@ -201,7 +138,6 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
jha_hash_4way( hash, vdata );
|
||||
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( !(hash[7] & mask) )
|
||||
@@ -239,11 +175,9 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
128
algo/lyra2/lyra2h-4way.c
Normal file
128
algo/lyra2/lyra2h-4way.c
Normal file
@@ -0,0 +1,128 @@
|
||||
#include "lyra2h-gate.h"
|
||||
|
||||
#ifdef LYRA2H_4WAY
|
||||
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
#include "lyra2.h"
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
|
||||
__thread uint64_t* lyra2h_4way_matrix;
|
||||
|
||||
bool lyra2h_4way_thread_init()
|
||||
{
|
||||
return ( lyra2h_4way_matrix = _mm_malloc( LYRA2H_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_4way_context l2h_4way_blake_mid;
|
||||
|
||||
void lyra2h_4way_midstate( const void* input )
|
||||
{
|
||||
blake256_4way_init( &l2h_4way_blake_mid );
|
||||
blake256_4way( &l2h_4way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2h_4way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake256_4way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &l2h_4way_blake_mid, sizeof l2h_4way_blake_mid );
|
||||
blake256_4way( &ctx_blake, input + (64*4), 16 );
|
||||
blake256_4way_close( &ctx_blake, vhash );
|
||||
|
||||
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
|
||||
LYRA2Z( lyra2h_4way_matrix, hash0, 32, hash0, 32, hash0, 32, 16, 16, 16 );
|
||||
LYRA2Z( lyra2h_4way_matrix, hash1, 32, hash1, 32, hash1, 32, 16, 16, 16 );
|
||||
LYRA2Z( lyra2h_4way_matrix, hash2, 32, hash2, 32, hash2, 32, 16, 16, 16 );
|
||||
LYRA2Z( lyra2h_4way_matrix, hash3, 32, hash3, 32, hash3, 32, 16, 16, 16 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
memcpy( state+64, hash2, 32 );
|
||||
memcpy( state+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 76; // 19*4
|
||||
uint32_t *noncep1 = vdata + 77;
|
||||
uint32_t *noncep2 = vdata + 78;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &edata[i], pdata[i] );
|
||||
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
lyra2h_4way_midstate( vdata );
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
be32enc( &edata[19], n );
|
||||
lyra2h_4way_hash( hash, vdata );
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = pdata[19] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
25
algo/lyra2/lyra2h-gate.c
Normal file
25
algo/lyra2/lyra2h-gate.c
Normal file
@@ -0,0 +1,25 @@
|
||||
#include "lyra2h-gate.h"
|
||||
#include "lyra2.h"
|
||||
|
||||
void lyra2h_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_lyra2h_algo( algo_gate_t* gate )
|
||||
{
|
||||
#ifdef LYRA2H_4WAY
|
||||
gate->miner_thread_init = (void*)&lyra2h_4way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2h_4way;
|
||||
gate->hash = (void*)&lyra2h_4way_hash;
|
||||
#else
|
||||
gate->miner_thread_init = (void*)&lyra2h_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2h;
|
||||
gate->hash = (void*)&lyra2h_hash;
|
||||
#endif
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
gate->set_target = (void*)&lyra2h_set_target;
|
||||
return true;
|
||||
};
|
||||
|
32
algo/lyra2/lyra2h-gate.h
Normal file
32
algo/lyra2/lyra2h-gate.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef LYRA2H_GATE_H__
|
||||
#define LYRA2H_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(HASH_4WAY)
|
||||
#define LYRA2H_4WAY
|
||||
#endif
|
||||
|
||||
#define LYRA2H_MATRIX_SIZE BLOCK_LEN_INT64 * 16 * 16 * 8
|
||||
|
||||
#if defined(LYRA2H_4WAY)
|
||||
|
||||
void lyra2h_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
bool lyra2h_4way_thread_init();
|
||||
|
||||
#endif
|
||||
|
||||
void lyra2h_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
bool lyra2h_thread_init();
|
||||
|
||||
#endif
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#include "lyra2h-gate.h"
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
#include "algo-gate-api.h"
|
||||
#include "lyra2.h"
|
||||
#include "algo/blake/sph_blake.h"
|
||||
|
||||
@@ -8,8 +8,7 @@ __thread uint64_t* lyra2h_matrix;
|
||||
|
||||
bool lyra2h_thread_init()
|
||||
{
|
||||
const int i = 16 * 16 * 96;
|
||||
lyra2h_matrix = _mm_malloc( i, 64 );
|
||||
lyra2h_matrix = _mm_malloc( LYRA2H_MATRIX_SIZE, 64 );
|
||||
return lyra2h_matrix;
|
||||
}
|
||||
|
||||
@@ -74,20 +73,3 @@ int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void lyra2h_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_lyra2h_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2h_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2h;
|
||||
gate->hash = (void*)&lyra2h_hash;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
gate->set_target = (void*)&lyra2h_set_target;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -106,6 +106,7 @@ int scanhash_lyra2re(int thr_id, struct work *work,
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
177
algo/lyra2/lyra2rev2-4way.c
Normal file
177
algo/lyra2/lyra2rev2-4way.c
Normal file
@@ -0,0 +1,177 @@
|
||||
#include "lyra2rev2-gate.h"
|
||||
#include <memory.h>
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
|
||||
#include "algo/cubehash/sph_cubehash.h"
|
||||
#include "algo/bmw/sph_bmw.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
|
||||
typedef struct {
|
||||
blake256_4way_context blake;
|
||||
keccak256_4way_context keccak;
|
||||
cubehashParam cube;
|
||||
skein256_4way_context skein;
|
||||
sph_bmw256_context bmw;
|
||||
|
||||
} lyra2v2_4way_ctx_holder;
|
||||
|
||||
static lyra2v2_4way_ctx_holder l2v2_4way_ctx;
|
||||
|
||||
void init_lyra2rev2_4way_ctx()
|
||||
{
|
||||
// blake256_4way_init( &l2v2_4way_ctx.blake );
|
||||
keccak256_4way_init( &l2v2_4way_ctx.keccak );
|
||||
cubehashInit( &l2v2_4way_ctx.cube, 256, 16, 32 );
|
||||
skein256_4way_init( &l2v2_4way_ctx.skein );
|
||||
sph_bmw256_init( &l2v2_4way_ctx.bmw );
|
||||
}
|
||||
|
||||
void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (32)));
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash64[4*4] __attribute__ ((aligned (64)));
|
||||
lyra2v2_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &l2v2_4way_ctx, sizeof(l2v2_4way_ctx) );
|
||||
|
||||
blake256_4way( &ctx.blake, input + (64<<2), 16 );
|
||||
// blake256_4way( &ctx.blake, input, 80 );
|
||||
blake256_4way_close( &ctx.blake, vhash );
|
||||
|
||||
mm256_reinterleave_4x64( vhash64, vhash, 256 );
|
||||
keccak256_4way( &ctx.keccak, vhash64, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhash64 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
|
||||
memcpy( &ctx.cube, &l2v2_4way_ctx.cube, sizeof ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash1, (const byte*) hash1, 32 );
|
||||
memcpy( &ctx.cube, &l2v2_4way_ctx.cube, sizeof ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash2, (const byte*) hash2, 32 );
|
||||
memcpy( &ctx.cube, &l2v2_4way_ctx.cube, sizeof ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );
|
||||
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
|
||||
|
||||
mm256_interleave_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
||||
skein256_4way( &ctx.skein, vhash64, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhash64 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
memcpy( &ctx.cube, &l2v2_4way_ctx.cube, sizeof ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
|
||||
memcpy( &ctx.cube, &l2v2_4way_ctx.cube, sizeof ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash1, (const byte*) hash1, 32 );
|
||||
memcpy( &ctx.cube, &l2v2_4way_ctx.cube, sizeof ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash2, (const byte*) hash2, 32 );
|
||||
memcpy( &ctx.cube, &l2v2_4way_ctx.cube, sizeof ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );
|
||||
|
||||
|
||||
sph_bmw256( &ctx.bmw, hash0, 32 );
|
||||
sph_bmw256_close( &ctx.bmw, hash0 );
|
||||
memcpy( &ctx.bmw, &l2v2_4way_ctx.bmw, sizeof ctx.bmw );
|
||||
sph_bmw256( &ctx.bmw, hash1, 32 );
|
||||
sph_bmw256_close( &ctx.bmw, hash1 );
|
||||
memcpy( &ctx.bmw, &l2v2_4way_ctx.bmw, sizeof ctx.bmw );
|
||||
sph_bmw256( &ctx.bmw, hash2, 32 );
|
||||
sph_bmw256_close( &ctx.bmw, hash2 );
|
||||
memcpy( &ctx.bmw, &l2v2_4way_ctx.bmw, sizeof ctx.bmw );
|
||||
sph_bmw256( &ctx.bmw, hash3, 32 );
|
||||
sph_bmw256_close( &ctx.bmw, hash3 );
|
||||
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
memcpy( state+64, hash2, 32 );
|
||||
memcpy( state+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 76; // 19*4
|
||||
uint32_t *noncep1 = vdata + 77;
|
||||
uint32_t *noncep2 = vdata + 78;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
blake256_4way_init( &l2v2_4way_ctx.blake );
|
||||
blake256_4way( &l2v2_4way_ctx.blake, vdata, 64 );
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
lyra2rev2_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = pdata[19] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
38
algo/lyra2/lyra2rev2-gate.c
Normal file
38
algo/lyra2/lyra2rev2-gate.c
Normal file
@@ -0,0 +1,38 @@
|
||||
#include "lyra2rev2-gate.h"
|
||||
|
||||
__thread uint64_t* l2v2_wholeMatrix;
|
||||
|
||||
void lyra2rev2_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool lyra2rev2_thread_init()
|
||||
{
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols
|
||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
int i = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
||||
l2v2_wholeMatrix = _mm_malloc( i, 64 );
|
||||
|
||||
return l2v2_wholeMatrix;
|
||||
}
|
||||
|
||||
bool register_lyra2rev2_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (LYRA2REV2_4WAY)
|
||||
init_lyra2rev2_4way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2_4way;
|
||||
gate->hash = (void*)&lyra2rev2_4way_hash;
|
||||
#else
|
||||
init_lyra2rev2_ctx();
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2;
|
||||
gate->hash = (void*)&lyra2rev2_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
|
||||
gate->set_target = (void*)&lyra2rev2_set_target;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
35
algo/lyra2/lyra2rev2-gate.h
Normal file
35
algo/lyra2/lyra2rev2-gate.h
Normal file
@@ -0,0 +1,35 @@
|
||||
#ifndef LYRA2REV2_GATE_H__
|
||||
#define LYRA2REV2_GATE_H__ 1
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
#include "lyra2.h"
|
||||
|
||||
#if defined(HASH_4WAY)
|
||||
#define LYRA2REV2_4WAY
|
||||
#endif
|
||||
|
||||
extern __thread uint64_t* l2v2_wholeMatrix;
|
||||
|
||||
bool register_lyra2rev2_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(LYRA2REV2_4WAY)
|
||||
|
||||
void lyra2rev2_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_lyra2rev2_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void lyra2rev2_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_lyra2rev2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_lyra2rev2_ctx();
|
||||
|
||||
#endif
|
||||
|
@@ -1,20 +1,12 @@
|
||||
#include "lyra2rev2-gate.h"
|
||||
#include <memory.h>
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/cubehash/sph_cubehash.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/bmw/sph_bmw.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
#include "lyra2.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
// This gets allocated when miner_thread starts up and is never freed.
|
||||
// It's not a leak because the only way to allocate it again is to exit
|
||||
// the thread and that only occurs when the entire program exits.
|
||||
__thread uint64_t* l2v2_wholeMatrix;
|
||||
//#include "lyra2.h"
|
||||
|
||||
typedef struct {
|
||||
cubehashParam cube1;
|
||||
@@ -106,6 +98,7 @@ int scanhash_lyra2rev2(int thr_id, struct work *work,
|
||||
if( fulltest(hash, ptarget) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
@@ -119,30 +112,3 @@ int scanhash_lyra2rev2(int thr_id, struct work *work,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void lyra2rev2_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool lyra2rev2_thread_init()
|
||||
{
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols
|
||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
int i = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
||||
l2v2_wholeMatrix = _mm_malloc( i, 64 );
|
||||
|
||||
return l2v2_wholeMatrix;
|
||||
}
|
||||
|
||||
bool register_lyra2rev2_algo( algo_gate_t* gate )
|
||||
{
|
||||
init_lyra2rev2_ctx();
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2;
|
||||
gate->hash = (void*)&lyra2rev2_hash;
|
||||
gate->set_target = (void*)&lyra2rev2_set_target;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -85,8 +85,8 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
be32enc( &edata[19], n );
|
||||
lyra2z_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
|
@@ -82,41 +82,3 @@ int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
//int64_t get_max64_0xffffLL() { return 0xffffLL; };
|
||||
|
||||
void lyra2z_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool zcoin_get_work_height( struct work* work, struct stratum_ctx* sctx )
|
||||
{
|
||||
work->height = sctx->bloc_height;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool lyra2z_thread_init()
|
||||
{
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 8; // nCols
|
||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
int i = (int64_t)ROW_LEN_BYTES * 8; // nRows;
|
||||
lyra2z_wholeMatrix = _mm_malloc( i, 64 );
|
||||
|
||||
return lyra2z_wholeMatrix;
|
||||
}
|
||||
|
||||
bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2z_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z;
|
||||
gate->hash = (void*)&lyra2z_hash;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
gate->set_target = (void*)&lyra2z_set_target;
|
||||
// gate->prevent_dupes = (void*)&zcoin_get_work_height;
|
||||
return true;
|
||||
};
|
||||
*/
|
||||
|
@@ -346,6 +346,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
|
||||
hash_str,
|
||||
target_str);
|
||||
}
|
||||
work_set_target_ratio( work, hash );
|
||||
pdata[19] = data[19];
|
||||
goto out;
|
||||
}
|
||||
|
@@ -132,6 +132,7 @@ int scanhash_nist5(int thr_id, struct work *work,
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
|
@@ -172,6 +172,7 @@ int scanhash_zr5( int thr_id, struct work *work,
|
||||
pdata[0] = tmpdata[0];
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash );
|
||||
if (opt_debug)
|
||||
applog(LOG_INFO, "found nonce %x", nonce);
|
||||
return 1;
|
||||
|
207
algo/quark/quark-4way.c
Normal file
207
algo/quark/quark-4way.c
Normal file
@@ -0,0 +1,207 @@
|
||||
#include "cpuminer-config.h"
|
||||
#include "quark-gate.h"
|
||||
|
||||
#if defined (__AVX2__) && defined (__AES__)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
|
||||
typedef struct {
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
jh512_4way_context jh;
|
||||
skein512_4way_context skein;
|
||||
keccak512_4way_context keccak;
|
||||
} quark_4way_ctx_holder;
|
||||
|
||||
quark_4way_ctx_holder quark_4way_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void init_quark_4way_ctx()
|
||||
{
|
||||
blake512_4way_init( &quark_4way_ctx.blake );
|
||||
bmw512_4way_init( &quark_4way_ctx.bmw );
|
||||
init_groestl( &quark_4way_ctx.groestl, 64 );
|
||||
skein512_4way_init( &quark_4way_ctx.skein );
|
||||
jh512_4way_init( &quark_4way_ctx.jh );
|
||||
keccak512_4way_init( &quark_4way_ctx.keccak );
|
||||
}
|
||||
|
||||
void quark_4way_hash( void *state, const void *input )
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
|
||||
__m256i* vh = (__m256i*)vhash;
|
||||
__m256i* vhA = (__m256i*)vhashA;
|
||||
__m256i* vhB = (__m256i*)vhashB;
|
||||
__m256i vh_mask;
|
||||
__m256i bit3_mask; bit3_mask = _mm256_set1_epi64x( 8 );
|
||||
int i;
|
||||
quark_4way_ctx_holder ctx;
|
||||
memcpy( &ctx, &quark_4way_ctx, sizeof(quark_4way_ctx) );
|
||||
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
||||
mm256_zero );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(char*)hash1, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(char*)hash2, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
mm256_interleave_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
||||
mm256_zero );
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhashA );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
||||
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
||||
mm256_zero );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhashB );
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
||||
|
||||
mm256_deinterleave_4x64( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
quark_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & 0xFFFFFF00 ) == 0 && fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] & 0xFFFFFF00 ) == 0 && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+16)[7] & 0xFFFFFF00 ) == 0 && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+24)[7] & 0xFFFFFF00 ) == 0 && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
17
algo/quark/quark-gate.c
Normal file
17
algo/quark/quark-gate.c
Normal file
@@ -0,0 +1,17 @@
|
||||
#include "quark-gate.h"
|
||||
|
||||
bool register_quark_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (QUARK_4WAY)
|
||||
init_quark_4way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_quark_4way;
|
||||
gate->hash = (void*)&quark_4way_hash;
|
||||
#else
|
||||
init_quark_ctx();
|
||||
gate->scanhash = (void*)&scanhash_quark;
|
||||
gate->hash = (void*)&quark_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | FOUR_WAY_OPT;
|
||||
return true;
|
||||
};
|
||||
|
32
algo/quark/quark-gate.h
Normal file
32
algo/quark/quark-gate.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef QUARK_GATE_H__
|
||||
#define QUARK_GATE_H__ 1
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(HASH_4WAY) && defined(__AES__)
|
||||
#define QUARK_4WAY
|
||||
#endif
|
||||
|
||||
bool register_quark_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(QUARK_4WAY)
|
||||
|
||||
void quark_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_quark_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void quark_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_quark( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_quark_ctx();
|
||||
|
||||
#endif
|
||||
|
@@ -1,5 +1,5 @@
|
||||
#include "cpuminer-config.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include "quark-gate.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
@@ -47,7 +47,7 @@ void init_quark_ctx()
|
||||
#endif
|
||||
}
|
||||
|
||||
inline static void quarkhash(void *state, const void *input)
|
||||
void quark_hash(void *state, const void *input)
|
||||
{
|
||||
unsigned char hashbuf[128];
|
||||
size_t hashptr;
|
||||
@@ -187,11 +187,12 @@ int scanhash_quark( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
quarkhash(hash64, &endiandata);
|
||||
quark_hash(hash64, &endiandata);
|
||||
if ((hash64[7]&0xFFFFFF00)==0)
|
||||
{
|
||||
if (fulltest(hash64, ptarget))
|
||||
{
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
@@ -203,12 +204,3 @@ int scanhash_quark( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_quark_algo( algo_gate_t* gate )
|
||||
{
|
||||
init_quark_ctx();
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_quark;
|
||||
gate->hash = (void*)&quarkhash;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -122,6 +122,7 @@ int scanhash_deep( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
|
@@ -134,6 +134,7 @@ int scanhash_qubit(int thr_id, struct work *work,
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
|
@@ -754,6 +754,7 @@ extern int scanhash_scrypt( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if (unlikely(hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget))) {
|
||||
*hashes_done = n - pdata[19] + 1;
|
||||
pdata[19] = data[i * 20 + 19];
|
||||
work_set_target_ratio( work, hash );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
@@ -114,7 +114,7 @@ available_implementations() {
|
||||
return flags;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
static int
|
||||
scrypt_test_mix() {
|
||||
static const uint8_t expected[16] = {
|
||||
@@ -145,4 +145,4 @@ scrypt_test_mix() {
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
*/
|
||||
|
@@ -26,7 +26,7 @@
|
||||
#include "scrypt-jane-pbkdf2.h"
|
||||
|
||||
#define SCRYPT_TEST_HASH_LEN 257 /* (2 * largest block size) + 1 */
|
||||
|
||||
/*
|
||||
static int
|
||||
scrypt_test_hash() {
|
||||
scrypt_hash_state st;
|
||||
@@ -45,4 +45,4 @@ scrypt_test_hash() {
|
||||
scrypt_hash_finish(&st, final);
|
||||
return scrypt_verify(final, scrypt_test_hash_expected, SCRYPT_HASH_DIGEST_SIZE);
|
||||
}
|
||||
|
||||
*/
|
||||
|
@@ -342,17 +342,6 @@ do { \
|
||||
do { \
|
||||
sph_u64 t0, t1, t2; \
|
||||
__m256i h8; \
|
||||
/* can LE be assumed? \
|
||||
dec64le does nothing when SPH_LITTLE endian is set, as it is. \
|
||||
__m256i m0 = _mm256_dec64le( buf ); \
|
||||
__m256i m1 = _mm256_dec64le( buf + 8*4 ); \
|
||||
__m256i m2 = _mm256_dec64le( buf + 16*4 ); \
|
||||
__m256i m3 = _mm256_dec64le( buf + 24*4 ); \
|
||||
__m256i m4 = _mm256_dec64le( buf + 32*4 ); \
|
||||
__m256i m5 = _mm256_dec64le( buf + 40*4 ); \
|
||||
__m256i m6 = _mm256_dec64le( buf + 48*4 ); \
|
||||
__m256i m7 = _mm256_dec64le( buf + 56*4 ); \
|
||||
*/ \
|
||||
__m256i m0 = buf[0]; \
|
||||
__m256i m1 = buf[1]; \
|
||||
__m256i m2 = buf[2]; \
|
||||
|
@@ -39,7 +39,9 @@
|
||||
*/
|
||||
|
||||
#ifndef __SKEIN_HASH_4WAY_H__
|
||||
#define __SKEIN_HASH_4WAY_H__
|
||||
#define __SKEIN_HASH_4WAY_H__ 1
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
@@ -53,14 +55,15 @@ extern "C"{
|
||||
#define SPH_SIZE_skein256 256
|
||||
#define SPH_SIZE_skein512 512
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[8] __attribute__ ((aligned (32)));
|
||||
__m256i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
size_t ptr;
|
||||
sph_u64 bcount;
|
||||
} skein512_4way_context;
|
||||
} sph_skein_4way_big_context;
|
||||
|
||||
typedef sph_skein_4way_big_context skein512_4way_context;
|
||||
typedef sph_skein_4way_big_context skein256_4way_context;
|
||||
|
||||
void skein512_4way_init(void *cc);
|
||||
void skein512_4way(void *cc, const void *data, size_t len);
|
||||
@@ -68,26 +71,15 @@ void skein512_4way_close(void *cc, void *dst);
|
||||
//void sph_skein512_addbits_and_close(
|
||||
// void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __AVX__
|
||||
|
||||
typedef struct {
|
||||
__m128i buf[8] __attribute__ ((aligned (32)));
|
||||
__m128i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
size_t ptr;
|
||||
sph_u64 bcount;
|
||||
} skein256_4way_context;
|
||||
|
||||
void skein256_4way_init(void *cc);
|
||||
void skein256_4way(void *cc, const void *data, size_t len);
|
||||
void skein256_4way_close(void *cc, void *dst);
|
||||
//void sph_skein256_addbits_and_close(
|
||||
// void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
231
algo/sm3/sm3-hash-4way.c
Normal file
231
algo/sm3/sm3-hash-4way.c
Normal file
@@ -0,0 +1,231 @@
|
||||
/* ====================================================================
|
||||
* Copyright (c) 2014 - 2017 The GmSSL Project. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this
|
||||
* software must display the following acknowledgment:
|
||||
* "This product includes software developed by the GmSSL Project.
|
||||
* (http://gmssl.org/)"
|
||||
*
|
||||
* 4. The name "GmSSL Project" must not be used to endorse or promote
|
||||
* products derived from this software without prior written
|
||||
* permission. For written permission, please contact
|
||||
* guanzhi1980@gmail.com.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "GmSSL"
|
||||
* nor may "GmSSL" appear in their names without prior written
|
||||
* permission of the GmSSL Project.
|
||||
*
|
||||
* 6. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by the GmSSL Project
|
||||
* (http://gmssl.org/)"
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE GmSSL PROJECT ``AS IS'' AND ANY
|
||||
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE GmSSL PROJECT OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "sm3-hash-4way.h"
|
||||
|
||||
#ifdef __AVX__
|
||||
|
||||
void sm3_4way_init( sm3_4way_ctx_t *ctx )
|
||||
{
|
||||
ctx->digest[0] = _mm_set1_epi32( 0x7380166F );
|
||||
ctx->digest[1] = _mm_set1_epi32( 0x4914B2B9 );
|
||||
ctx->digest[2] = _mm_set1_epi32( 0x172442D7 );
|
||||
ctx->digest[3] = _mm_set1_epi32( 0xDA8A0600 );
|
||||
ctx->digest[4] = _mm_set1_epi32( 0xA96F30BC );
|
||||
ctx->digest[5] = _mm_set1_epi32( 0x163138AA );
|
||||
ctx->digest[6] = _mm_set1_epi32( 0xE38DEE4D );
|
||||
ctx->digest[7] = _mm_set1_epi32( 0xB0FB0E4E );
|
||||
ctx->nblocks = 0;
|
||||
ctx->num = 0;
|
||||
}
|
||||
|
||||
void sm3_4way( void *cc, const void *data, size_t len )
|
||||
{
|
||||
sm3_4way_ctx_t *ctx = (sm3_4way_ctx_t*)cc;
|
||||
__m128i *block = (__m128i*)ctx->block;
|
||||
__m128i *vdata = (__m128i*)data;
|
||||
|
||||
if ( ctx->num )
|
||||
{
|
||||
unsigned int left = SM3_BLOCK_SIZE - ctx->num;
|
||||
if ( len < left )
|
||||
{
|
||||
memcpy_128( block + (ctx->num >> 2), vdata , len>>2 );
|
||||
ctx->num += len;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy_128( block + (ctx->num >> 2), vdata , left>>2 );
|
||||
sm3_4way_compress( ctx->digest, block );
|
||||
ctx->nblocks++;
|
||||
vdata += left>>2;
|
||||
len -= left;
|
||||
}
|
||||
}
|
||||
while ( len >= SM3_BLOCK_SIZE )
|
||||
{
|
||||
sm3_4way_compress( ctx->digest, vdata );
|
||||
ctx->nblocks++;
|
||||
vdata += SM3_BLOCK_SIZE>>2;
|
||||
len -= SM3_BLOCK_SIZE;
|
||||
}
|
||||
ctx->num = len;
|
||||
if ( len )
|
||||
memcpy_128( block, vdata, len>>2 );
|
||||
}
|
||||
|
||||
void sm3_4way_close( void *cc, void *dst )
|
||||
{
|
||||
sm3_4way_ctx_t *ctx = (sm3_4way_ctx_t*)cc;
|
||||
__m128i *hash = (__m128i*)dst;
|
||||
__m128i *count = (__m128i*)(ctx->block + ( (SM3_BLOCK_SIZE - 8) >> 2 ) );
|
||||
__m128i *block = (__m128i*)ctx->block;
|
||||
int i;
|
||||
|
||||
block[ctx->num] = _mm_set1_epi32( 0x80 );
|
||||
|
||||
if ( ctx->num + 8 <= SM3_BLOCK_SIZE )
|
||||
{
|
||||
memset_zero_128( block + (ctx->num >> 2) + 1,
|
||||
( SM3_BLOCK_SIZE - ctx->num - 8 ) >> 2 );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_128( block + (ctx->num >> 2) + 1,
|
||||
( SM3_BLOCK_SIZE - (ctx->num >> 2) - 1 ) );
|
||||
sm3_4way_compress( ctx->digest, block );
|
||||
memset_zero_128( block, ( SM3_BLOCK_SIZE - 8 ) >> 2 );
|
||||
}
|
||||
|
||||
count[0] = mm_byteswap_32(
|
||||
_mm_set1_epi32( ctx->nblocks >> 23 ) );
|
||||
count[1] = mm_byteswap_32( _mm_set1_epi32( ( ctx->nblocks << 9 ) +
|
||||
( ctx->num << 3 ) ) );
|
||||
sm3_4way_compress( ctx->digest, block );
|
||||
|
||||
for ( i = 0; i < 8 ; i++ )
|
||||
hash[i] = mm_byteswap_32( ctx->digest[i] );
|
||||
}
|
||||
|
||||
#define P0(x) _mm_xor_si128( x, _mm_xor_si128( mm_rotl_32( x, 9 ), \
|
||||
mm_rotl_32( x, 17 ) ) )
|
||||
#define P1(x) _mm_xor_si128( x, _mm_xor_si128( mm_rotl_32( x, 15 ), \
|
||||
mm_rotl_32( x, 23 ) ) )
|
||||
|
||||
#define FF0(x,y,z) _mm_xor_si128( x, _mm_xor_si128( y, z ) )
|
||||
#define FF1(x,y,z) _mm_or_si128( _mm_or_si128( _mm_and_si128( x, y ), \
|
||||
_mm_and_si128( x, z ) ), \
|
||||
_mm_and_si128( y, z ) )
|
||||
|
||||
#define GG0(x,y,z) FF0(x,y,z)
|
||||
#define GG1(x,y,z) _mm_or_si128( _mm_and_si128( x, y ), \
|
||||
_mm_andnot_si128( x, z ) )
|
||||
|
||||
|
||||
void sm3_4way_compress( __m128i *digest, __m128i *block )
|
||||
{
|
||||
__m128i W[68], W1[64];
|
||||
__m128i A = digest[ 0 ];
|
||||
__m128i B = digest[ 1 ];
|
||||
__m128i C = digest[ 2 ];
|
||||
__m128i D = digest[ 3 ];
|
||||
__m128i E = digest[ 4 ];
|
||||
__m128i F = digest[ 5 ];
|
||||
__m128i G = digest[ 6 ];
|
||||
__m128i H = digest[ 7 ];
|
||||
__m128i SS1, SS2, TT1, TT2, T;
|
||||
int j;
|
||||
|
||||
for ( j = 0; j < 16; j++ )
|
||||
W[j] = mm_byteswap_32( block[j] );
|
||||
|
||||
for ( j = 16; j < 68; j++ )
|
||||
W[j] = _mm_xor_si128( P1( _mm_xor_si128( _mm_xor_si128( W[ j-16 ],
|
||||
W[ j-9 ] ),
|
||||
mm_rotl_32( W[ j-3 ], 15 ) ) ),
|
||||
_mm_xor_si128( mm_rotl_32( W[ j-13 ], 7 ),
|
||||
W[ j-6 ] ) );
|
||||
|
||||
for( j = 0; j < 64; j++ )
|
||||
W1[j] = _mm_xor_si128( W[j], W[j+4] );
|
||||
|
||||
T = _mm_set1_epi32( 0x79CC4519UL );
|
||||
for( j =0; j < 16; j++ )
|
||||
{
|
||||
SS1 = mm_rotl_32( _mm_add_epi32( _mm_add_epi32( mm_rotl_32( A, 12 ), E ),
|
||||
mm_rotl_32( T, j ) ), 7 );
|
||||
SS2 = _mm_xor_si128( SS1, mm_rotl_32( A, 12 ) );
|
||||
TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF0( A, B, C ), D ),
|
||||
SS2 ), W1[j] );
|
||||
TT2 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( GG0( E, F, G ), H ),
|
||||
SS1 ), W[j] );
|
||||
D = C;
|
||||
C = mm_rotl_32( B, 9 );
|
||||
B = A;
|
||||
A = TT1;
|
||||
H = G;
|
||||
G = mm_rotl_32( F, 19 );
|
||||
F = E;
|
||||
E = P0( TT2 );
|
||||
}
|
||||
|
||||
T = _mm_set1_epi32( 0x7A879D8AUL );
|
||||
for( j =16; j < 64; j++ )
|
||||
{
|
||||
SS1 = mm_rotl_32( _mm_add_epi32( _mm_add_epi32( mm_rotl_32( A, 12 ), E ),
|
||||
mm_rotl_32( T, j&31 ) ), 7 );
|
||||
SS2 = _mm_xor_si128( SS1, mm_rotl_32( A, 12 ) );
|
||||
TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF1( A, B, C ), D ),
|
||||
SS2 ), W1[j] );
|
||||
TT2 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( GG1( E, F, G ), H ),
|
||||
SS1 ), W[j] );
|
||||
D = C;
|
||||
C = mm_rotl_32( B, 9 );
|
||||
B = A;
|
||||
A = TT1;
|
||||
H = G;
|
||||
G = mm_rotl_32( F, 19 );
|
||||
F = E;
|
||||
E = P0( TT2 );
|
||||
}
|
||||
|
||||
digest[0] = _mm_xor_si128( digest[0], A );
|
||||
digest[1] = _mm_xor_si128( digest[1], B );
|
||||
digest[2] = _mm_xor_si128( digest[2], C );
|
||||
digest[3] = _mm_xor_si128( digest[3], D );
|
||||
digest[4] = _mm_xor_si128( digest[4], E );
|
||||
digest[5] = _mm_xor_si128( digest[5], F );
|
||||
digest[6] = _mm_xor_si128( digest[6], G );
|
||||
digest[7] = _mm_xor_si128( digest[7], H );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
89
algo/sm3/sm3-hash-4way.h
Normal file
89
algo/sm3/sm3-hash-4way.h
Normal file
@@ -0,0 +1,89 @@
|
||||
/* ====================================================================
|
||||
* Copyright (c) 2014 - 2016 The GmSSL Project. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this
|
||||
* software must display the following acknowledgment:
|
||||
* "This product includes software developed by the GmSSL Project.
|
||||
* (http://gmssl.org/)"
|
||||
*
|
||||
* 4. The name "GmSSL Project" must not be used to endorse or promote
|
||||
* products derived from this software without prior written
|
||||
* permission. For written permission, please contact
|
||||
* guanzhi1980@gmail.com.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "GmSSL"
|
||||
* nor may "GmSSL" appear in their names without prior written
|
||||
* permission of the GmSSL Project.
|
||||
*
|
||||
* 6. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by the GmSSL Project
|
||||
* (http://gmssl.org/)"
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE GmSSL PROJECT ``AS IS'' AND ANY
|
||||
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE GmSSL PROJECT OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*/
|
||||
|
||||
#ifndef SPH_SM3_HASH_4WAY_H
|
||||
#define SPH_SM3_HASH_4WAY_H
|
||||
|
||||
#define SM3_DIGEST_LENGTH 32
|
||||
#define SM3_BLOCK_SIZE 64
|
||||
#define SM3_CBLOCK (SM3_BLOCK_SIZE)
|
||||
#define SM3_HMAC_SIZE (SM3_DIGEST_LENGTH)
|
||||
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "avxdefs.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct {
|
||||
__m128i block[16] __attribute__ ((aligned (64)));
|
||||
__m128i digest[8];
|
||||
uint32_t nblocks;
|
||||
uint32_t num;
|
||||
} sm3_4way_ctx_t;
|
||||
|
||||
void sm3_4way_init( sm3_4way_ctx_t *ctx );
|
||||
//void sm3_4way_update( sm3_4way_ctx_t *ctx, const unsigned char* data,
|
||||
// size_t data_len );
|
||||
//void sm3_4way_final( sm3_4way_ctx_t *ctx,
|
||||
// unsigned char digest[SM3_DIGEST_LENGTH] );
|
||||
void sm3_4way_compress( __m128i *digest, __m128i *block );
|
||||
|
||||
void sm3_4way(void *cc, const void *data, size_t len);
|
||||
void sm3_4way_close(void *cc, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
@@ -189,7 +189,7 @@ void sm3_compress(uint32_t digest[8], const unsigned char block[64])
|
||||
for(j =16; j < 64; j++) {
|
||||
|
||||
T[j] = 0x7A879D8A;
|
||||
SS1 = ROTATELEFT((ROTATELEFT(A,12) + E + ROTATELEFT(T[j],j)), 7);
|
||||
SS1 = ROTATELEFT((ROTATELEFT(A,12) + E + ROTATELEFT(T[j],j&31)), 7);
|
||||
SS2 = SS1 ^ ROTATELEFT(A,12);
|
||||
TT1 = FF1(A,B,C) + D + SS2 + W1[j];
|
||||
TT2 = GG1(E,F,G) + H + SS1 + W[j];
|
||||
|
@@ -3468,9 +3468,10 @@ sph_ ## name ## _close(void *cc, void *dst) \
|
||||
for (i = 0; i < 8; i ++) \
|
||||
sph_enc64le((unsigned char *)dst + 8 * i, sc->state[i]); \
|
||||
}
|
||||
// sph_ ## name ## _init(cc); \
|
||||
//}
|
||||
|
||||
/*
|
||||
sph_ ## name ## _init(cc); \
|
||||
}
|
||||
*/
|
||||
MAKE_CLOSE(whirlpool)
|
||||
MAKE_CLOSE(whirlpool0)
|
||||
MAKE_CLOSE(whirlpool1)
|
||||
|
@@ -22,6 +22,7 @@ void whirlpool_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_whirlpool( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
void init_whirlpool_ctx();
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@@ -3345,8 +3345,10 @@ do { \
|
||||
#define READ_STATE MUL8(READ_STATE_W)
|
||||
#define ROUND0 MUL8(ROUND0_W)
|
||||
#define UPDATE_STATE MUL8(UPDATE_STATE_W)
|
||||
//#define BYTE(x, n) \
|
||||
// _mm256_and_si256( _mm256_srli_epi64( x, n<<3 ), _mm256_set1_epi64x( 0xFF ) )
|
||||
/*
|
||||
#define BYTE(x, n) \
|
||||
_mm256_and_si256( _mm256_srli_epi64( x, n<<3 ), _mm256_set1_epi64x( 0xFF ) )
|
||||
*/
|
||||
#define BYTE(x, n) ((unsigned)((x) >> (8 * (n))) & 0xFF)
|
||||
|
||||
|
||||
|
@@ -162,7 +162,8 @@ int scanhash_c11( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
work_set_target_ratio( work, hash );
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
|
274
algo/x11/timetravel-4way.c
Normal file
274
algo/x11/timetravel-4way.c
Normal file
@@ -0,0 +1,274 @@
|
||||
#include "timetravel-gate.h"
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/luffa/sse2/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread int permutation[TT8_FUNC_COUNT] = { 0 };
|
||||
|
||||
typedef struct {
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
hashState_luffa luffa;
|
||||
cubehashParam cube;
|
||||
} tt8_4way_ctx_holder;
|
||||
|
||||
tt8_4way_ctx_holder tt8_4way_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void init_tt8_4way_ctx()
|
||||
{
|
||||
blake512_4way_init( &tt8_4way_ctx.blake );
|
||||
bmw512_4way_init( &tt8_4way_ctx.bmw );
|
||||
init_groestl( &tt8_4way_ctx.groestl, 64 );
|
||||
skein512_4way_init( &tt8_4way_ctx.skein );
|
||||
jh512_4way_init( &tt8_4way_ctx.jh );
|
||||
keccak512_4way_init( &tt8_4way_ctx.keccak );
|
||||
init_luffa( &tt8_4way_ctx.luffa, 512 );
|
||||
cubehashInit( &tt8_4way_ctx.cube, 512, 16, 32 );
|
||||
};
|
||||
|
||||
void timetravel_4way_hash(void *output, const void *input)
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashX[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashY[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t *vhashA, *vhashB;
|
||||
tt8_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
uint32_t dataLen = 64;
|
||||
int i;
|
||||
|
||||
memcpy( &ctx, &tt8_4way_ctx, sizeof(tt8_4way_ctx) );
|
||||
|
||||
for ( i = 0; i < TT8_FUNC_COUNT; i++ )
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
dataLen = 80;
|
||||
vhashA = (uint64_t*)input;
|
||||
vhashB = vhashX;
|
||||
}
|
||||
else
|
||||
{
|
||||
dataLen = 64;
|
||||
if ( i % 2 == 0 )
|
||||
{
|
||||
vhashA = vhashY;
|
||||
vhashB = vhashX;
|
||||
}
|
||||
else
|
||||
{
|
||||
vhashA = vhashX;
|
||||
vhashB = vhashY;
|
||||
}
|
||||
}
|
||||
|
||||
switch ( permutation[i] )
|
||||
{
|
||||
case 0:
|
||||
blake512_4way( &ctx.blake, vhashA, dataLen );
|
||||
blake512_4way_close( &ctx.blake, vhashB );
|
||||
if ( i == 7 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 1:
|
||||
bmw512_4way( &ctx.bmw, vhashA, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||
if ( i == 7 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 2:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, dataLen<<3 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(char*)hash1, dataLen<<3 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(char*)hash2, dataLen<<3 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, dataLen<<3 );
|
||||
if ( i != 7 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 3:
|
||||
skein512_4way( &ctx.skein, vhashA, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
if ( i == 7 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 4:
|
||||
jh512_4way( &ctx.jh, vhashA, dataLen );
|
||||
jh512_4way_close( &ctx.jh, vhashB );
|
||||
if ( i == 7 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 5:
|
||||
keccak512_4way( &ctx.keccak, vhashA, dataLen );
|
||||
keccak512_4way_close( &ctx.keccak, vhashB );
|
||||
if ( i == 7 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 6:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash0,
|
||||
(const BitSequence *)hash0, dataLen );
|
||||
memcpy( &ctx.luffa, &tt8_4way_ctx.luffa, sizeof(hashState_luffa) );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash1,
|
||||
(const BitSequence*)hash1, dataLen );
|
||||
memcpy( &ctx.luffa, &tt8_4way_ctx.luffa, sizeof(hashState_luffa) );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash2,
|
||||
(const BitSequence*)hash2, dataLen );
|
||||
memcpy( &ctx.luffa, &tt8_4way_ctx.luffa, sizeof(hashState_luffa) );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash3,
|
||||
(const BitSequence*)hash3, dataLen );
|
||||
if ( i != 7 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 7:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
|
||||
(const byte*)hash0, dataLen );
|
||||
memcpy( &ctx.cube, &tt8_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash1,
|
||||
(const byte*)hash1, dataLen );
|
||||
memcpy( &ctx.cube, &tt8_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash2,
|
||||
(const byte*)hash2, dataLen );
|
||||
memcpy( &ctx.cube, &tt8_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
|
||||
(const byte*)hash3, dataLen );
|
||||
if ( i != 7 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
default:
|
||||
applog(LOG_ERR,"SWERR: timetravel invalid permutation");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
memcpy( output+64, hash2, 32 );
|
||||
memcpy( output+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
int i;
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
for ( int k = 0; k < 19; k++ )
|
||||
be32enc( &endiandata[k], pdata[k] );
|
||||
|
||||
const uint32_t timestamp = endiandata[17];
|
||||
if ( timestamp != s_ntime )
|
||||
{
|
||||
const int steps = ( timestamp - TT8_FUNC_BASE_TIMESTAMP )
|
||||
% TT8_FUNC_COUNT_PERMUTATIONS;
|
||||
for ( i = 0; i < TT8_FUNC_COUNT; i++ )
|
||||
permutation[i] = i;
|
||||
for ( i = 0; i < steps; i++ )
|
||||
tt8_next_permutation( permutation, permutation + TT8_FUNC_COUNT );
|
||||
s_ntime = timestamp;
|
||||
}
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
timetravel_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
78
algo/x11/timetravel-gate.c
Normal file
78
algo/x11/timetravel-gate.c
Normal file
@@ -0,0 +1,78 @@
|
||||
#include "timetravel-gate.h"
|
||||
|
||||
void tt8_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_timetravel_algo( algo_gate_t* gate )
|
||||
{
|
||||
#ifdef TIMETRAVEL_4WAY
|
||||
init_tt8_4way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_timetravel_4way;
|
||||
gate->hash = (void*)&timetravel_4way_hash;
|
||||
#else
|
||||
init_tt8_ctx();
|
||||
gate->scanhash = (void*)&scanhash_timetravel;
|
||||
gate->hash = (void*)&timetravel_hash;
|
||||
#endif
|
||||
gate->set_target = (void*)&tt8_set_target;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
return true;
|
||||
};
|
||||
|
||||
inline void tt_swap( int *a, int *b )
|
||||
{
|
||||
int c = *a;
|
||||
*a = *b;
|
||||
*b = c;
|
||||
}
|
||||
|
||||
inline void reverse( int *pbegin, int *pend )
|
||||
{
|
||||
while ( (pbegin != pend) && (pbegin != --pend) )
|
||||
{
|
||||
tt_swap( pbegin, pend );
|
||||
pbegin++;
|
||||
}
|
||||
}
|
||||
|
||||
void tt8_next_permutation( int *pbegin, int *pend )
|
||||
{
|
||||
if ( pbegin == pend )
|
||||
return;
|
||||
|
||||
int *i = pbegin;
|
||||
++i;
|
||||
if ( i == pend )
|
||||
return;
|
||||
|
||||
i = pend;
|
||||
--i;
|
||||
|
||||
while (1)
|
||||
{
|
||||
int *j = i;
|
||||
--i;
|
||||
|
||||
if ( *i < *j )
|
||||
{
|
||||
int *k = pend;
|
||||
|
||||
while ( !(*i < *--k) ) /* do nothing */ ;
|
||||
|
||||
tt_swap( i, k );
|
||||
reverse(j, pend);
|
||||
return; // true
|
||||
}
|
||||
|
||||
if ( i == pbegin )
|
||||
{
|
||||
reverse(pbegin, pend);
|
||||
return; // false
|
||||
}
|
||||
// else?
|
||||
}
|
||||
}
|
||||
|
40
algo/x11/timetravel-gate.h
Normal file
40
algo/x11/timetravel-gate.h
Normal file
@@ -0,0 +1,40 @@
|
||||
#ifndef TIMETRAVEL_GATE_H__
|
||||
#define TIMETRAVEL_GATE_H__ 1
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(HASH_4WAY) && defined(__AES__)
|
||||
#define TIMETRAVEL_4WAY
|
||||
#endif
|
||||
|
||||
// Machinecoin Genesis Timestamp
|
||||
#define TT8_FUNC_BASE_TIMESTAMP 1389040865
|
||||
|
||||
#define TT8_FUNC_COUNT 8
|
||||
#define TT8_FUNC_COUNT_PERMUTATIONS 40320
|
||||
|
||||
void tt8_next_permutation( int *pbegin, int *pend );
|
||||
|
||||
bool register_timetravel_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(TIMETRAVEL_4WAY)
|
||||
|
||||
void timetravel_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_tt8_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void timetravel_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_timetravel( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_tt8_ctx();
|
||||
|
||||
#endif
|
||||
|
@@ -1,11 +1,9 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "timetravel-gate.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "avxdefs.h"
|
||||
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/bmw/sph_bmw.h"
|
||||
#include "algo/jh/sph_jh.h"
|
||||
@@ -13,75 +11,14 @@
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/luffa/sse2/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#else
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
|
||||
// Machinecoin Genesis Timestamp
|
||||
#define HASH_FUNC_BASE_TIMESTAMP 1389040865
|
||||
|
||||
#define HASH_FUNC_COUNT 8
|
||||
#define HASH_FUNC_COUNT_PERMUTATIONS 40320
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread int permutation[HASH_FUNC_COUNT] = { 0 };
|
||||
|
||||
inline void tt_swap( int *a, int *b )
|
||||
{
|
||||
int c = *a;
|
||||
*a = *b;
|
||||
*b = c;
|
||||
}
|
||||
|
||||
inline void reverse( int *pbegin, int *pend )
|
||||
{
|
||||
while ( (pbegin != pend) && (pbegin != --pend) )
|
||||
{
|
||||
tt_swap( pbegin, pend );
|
||||
pbegin++;
|
||||
}
|
||||
}
|
||||
|
||||
static void next_permutation( int *pbegin, int *pend )
|
||||
{
|
||||
if ( pbegin == pend )
|
||||
return;
|
||||
|
||||
int *i = pbegin;
|
||||
++i;
|
||||
if ( i == pend )
|
||||
return;
|
||||
|
||||
i = pend;
|
||||
--i;
|
||||
|
||||
while (1)
|
||||
{
|
||||
int *j = i;
|
||||
--i;
|
||||
|
||||
if ( *i < *j )
|
||||
{
|
||||
int *k = pend;
|
||||
|
||||
while ( !(*i < *--k) ) /* do nothing */ ;
|
||||
|
||||
tt_swap( i, k );
|
||||
reverse(j, pend);
|
||||
return; // true
|
||||
}
|
||||
|
||||
if ( i == pbegin )
|
||||
{
|
||||
reverse(pbegin, pend);
|
||||
return; // false
|
||||
}
|
||||
// else?
|
||||
}
|
||||
}
|
||||
static __thread int permutation[TT8_FUNC_COUNT] = { 0 };
|
||||
|
||||
typedef struct {
|
||||
sph_blake512_context blake;
|
||||
@@ -101,7 +38,7 @@ typedef struct {
|
||||
tt_ctx_holder tt_ctx __attribute__ ((aligned (64)));
|
||||
__thread tt_ctx_holder tt_mid __attribute__ ((aligned (64)));
|
||||
|
||||
void init_tt_ctx()
|
||||
void init_tt8_ctx()
|
||||
{
|
||||
sph_blake512_init( &tt_ctx.blake );
|
||||
sph_bmw512_init( &tt_ctx.bmw );
|
||||
@@ -119,7 +56,7 @@ void init_tt_ctx()
|
||||
|
||||
void timetravel_hash(void *output, const void *input)
|
||||
{
|
||||
uint32_t hash[ 16 * HASH_FUNC_COUNT ] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[ 16 * TT8_FUNC_COUNT ] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashA, *hashB;
|
||||
tt_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
uint32_t dataLen = 64;
|
||||
@@ -130,7 +67,7 @@ void timetravel_hash(void *output, const void *input)
|
||||
|
||||
memcpy( &ctx, &tt_ctx, sizeof(tt_ctx) );
|
||||
|
||||
for ( i = 0; i < HASH_FUNC_COUNT; i++ )
|
||||
for ( i = 0; i < TT8_FUNC_COUNT; i++ )
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
@@ -270,7 +207,7 @@ void timetravel_hash(void *output, const void *input)
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(output, &hash[16 * (HASH_FUNC_COUNT - 1)], 32);
|
||||
memcpy(output, &hash[16 * (TT8_FUNC_COUNT - 1)], 32);
|
||||
}
|
||||
|
||||
int scanhash_timetravel( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
@@ -296,12 +233,12 @@ int scanhash_timetravel( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t timestamp = endiandata[17];
|
||||
if ( timestamp != s_ntime )
|
||||
{
|
||||
const int steps = ( timestamp - HASH_FUNC_BASE_TIMESTAMP )
|
||||
% HASH_FUNC_COUNT_PERMUTATIONS;
|
||||
for ( i = 0; i < HASH_FUNC_COUNT; i++ )
|
||||
const int steps = ( timestamp - TT8_FUNC_BASE_TIMESTAMP )
|
||||
% TT8_FUNC_COUNT_PERMUTATIONS;
|
||||
for ( i = 0; i < TT8_FUNC_COUNT; i++ )
|
||||
permutation[i] = i;
|
||||
for ( i = 0; i < steps; i++ )
|
||||
next_permutation( permutation, permutation + HASH_FUNC_COUNT );
|
||||
tt8_next_permutation( permutation, permutation + TT8_FUNC_COUNT );
|
||||
s_ntime = timestamp;
|
||||
|
||||
// do midstate precalc for first function
|
||||
@@ -359,6 +296,7 @@ int scanhash_timetravel( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
work_set_target_ratio( work, hash );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
@@ -370,19 +308,4 @@ int scanhash_timetravel( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void timetravel_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_timetravel_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
init_tt_ctx();
|
||||
gate->scanhash = (void*)&scanhash_timetravel;
|
||||
gate->hash = (void*)&timetravel_hash;
|
||||
gate->set_target = (void*)&timetravel_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
return true;
|
||||
};
|
||||
|
316
algo/x11/timetravel10-4way.c
Normal file
316
algo/x11/timetravel10-4way.c
Normal file
@@ -0,0 +1,316 @@
|
||||
#include "timetravel10-gate.h"
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/luffa/sse2/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/simd/sse2/nist.h"
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread int permutation[TT10_FUNC_COUNT] = { 0 };
|
||||
|
||||
typedef struct {
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
hashState_luffa luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
} tt10_4way_ctx_holder;
|
||||
|
||||
tt10_4way_ctx_holder tt10_4way_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void init_tt10_4way_ctx()
|
||||
{
|
||||
blake512_4way_init( &tt10_4way_ctx.blake );
|
||||
bmw512_4way_init( &tt10_4way_ctx.bmw );
|
||||
init_groestl( &tt10_4way_ctx.groestl, 64 );
|
||||
skein512_4way_init( &tt10_4way_ctx.skein );
|
||||
jh512_4way_init( &tt10_4way_ctx.jh );
|
||||
keccak512_4way_init( &tt10_4way_ctx.keccak );
|
||||
init_luffa( &tt10_4way_ctx.luffa, 512 );
|
||||
cubehashInit( &tt10_4way_ctx.cube, 512, 16, 32 );
|
||||
sph_shavite512_init( &tt10_4way_ctx.shavite );
|
||||
init_sd( &tt10_4way_ctx.simd, 512 );
|
||||
};
|
||||
|
||||
void timetravel10_4way_hash(void *output, const void *input)
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashX[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashY[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t *vhashA, *vhashB;
|
||||
tt10_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
uint32_t dataLen = 64;
|
||||
int i;
|
||||
|
||||
memcpy( &ctx, &tt10_4way_ctx, sizeof(tt10_4way_ctx) );
|
||||
|
||||
for ( i = 0; i < TT10_FUNC_COUNT; i++ )
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
dataLen = 80;
|
||||
vhashA = (uint64_t*)input;
|
||||
vhashB = vhashX;
|
||||
}
|
||||
else
|
||||
{
|
||||
dataLen = 64;
|
||||
if ( i % 2 == 0 )
|
||||
{
|
||||
vhashA = vhashY;
|
||||
vhashB = vhashX;
|
||||
}
|
||||
else
|
||||
{
|
||||
vhashA = vhashX;
|
||||
vhashB = vhashY;
|
||||
}
|
||||
}
|
||||
|
||||
switch ( permutation[i] )
|
||||
{
|
||||
case 0:
|
||||
blake512_4way( &ctx.blake, vhashA, dataLen );
|
||||
blake512_4way_close( &ctx.blake, vhashB );
|
||||
if ( i == 9 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 1:
|
||||
bmw512_4way( &ctx.bmw, vhashA, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||
if ( i == 9 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 2:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, dataLen<<3 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(char*)hash1, dataLen<<3 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(char*)hash2, dataLen<<3 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, dataLen<<3 );
|
||||
if ( i != 9 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 3:
|
||||
skein512_4way( &ctx.skein, vhashA, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
if ( i == 9 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 4:
|
||||
jh512_4way( &ctx.jh, vhashA, dataLen );
|
||||
jh512_4way_close( &ctx.jh, vhashB );
|
||||
if ( i == 9 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 5:
|
||||
keccak512_4way( &ctx.keccak, vhashA, dataLen );
|
||||
keccak512_4way_close( &ctx.keccak, vhashB );
|
||||
if ( i == 9 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 6:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash0,
|
||||
(const BitSequence *)hash0, dataLen );
|
||||
memcpy( &ctx.luffa, &tt10_4way_ctx.luffa, sizeof(hashState_luffa) );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash1,
|
||||
(const BitSequence*)hash1, dataLen );
|
||||
memcpy( &ctx.luffa, &tt10_4way_ctx.luffa, sizeof(hashState_luffa) );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash2,
|
||||
(const BitSequence*)hash2, dataLen );
|
||||
memcpy( &ctx.luffa, &tt10_4way_ctx.luffa, sizeof(hashState_luffa) );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash3,
|
||||
(const BitSequence*)hash3, dataLen );
|
||||
if ( i != 9 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 7:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
|
||||
(const byte*)hash0, dataLen );
|
||||
memcpy( &ctx.cube, &tt10_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash1,
|
||||
(const byte*)hash1, dataLen );
|
||||
memcpy( &ctx.cube, &tt10_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash2,
|
||||
(const byte*)hash2, dataLen );
|
||||
memcpy( &ctx.cube, &tt10_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
|
||||
(const byte*)hash3, dataLen );
|
||||
if ( i != 9 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 8:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
sph_shavite512( &ctx.shavite, hash0, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
memcpy( &ctx.shavite, &tt10_4way_ctx.shavite, sizeof ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash1, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
memcpy( &ctx.shavite, &tt10_4way_ctx.shavite, sizeof ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash2, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
memcpy( &ctx.shavite, &tt10_4way_ctx.shavite, sizeof ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, hash3, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
if ( i != 9 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 9:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash0,
|
||||
(const BitSequence *)hash0, dataLen<<3 );
|
||||
memcpy( &ctx.simd, &tt10_4way_ctx.simd, sizeof ctx.simd );
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash1,
|
||||
(const BitSequence *)hash1, dataLen<<3 );
|
||||
memcpy( &ctx.simd, &tt10_4way_ctx.simd, sizeof ctx.simd );
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash2,
|
||||
(const BitSequence *)hash2, dataLen<<3 );
|
||||
memcpy( &ctx.simd, &tt10_4way_ctx.simd, sizeof ctx.simd );
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash3,
|
||||
(const BitSequence *)hash3, dataLen<<3 );
|
||||
if ( i != 9 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
default:
|
||||
applog(LOG_ERR,"SWERR: timetravel invalid permutation");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
memcpy( output+64, hash2, 32 );
|
||||
memcpy( output+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_timetravel10_4way( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
int i;
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
for ( int k = 0; k < 19; k++ )
|
||||
be32enc( &endiandata[k], pdata[k] );
|
||||
|
||||
const uint32_t timestamp = endiandata[17];
|
||||
if ( timestamp != s_ntime )
|
||||
{
|
||||
const int steps = ( timestamp - TT10_FUNC_BASE_TIMESTAMP )
|
||||
% TT10_FUNC_COUNT_PERMUTATIONS;
|
||||
for ( i = 0; i < TT10_FUNC_COUNT; i++ )
|
||||
permutation[i] = i;
|
||||
for ( i = 0; i < steps; i++ )
|
||||
tt10_next_permutation( permutation, permutation + TT10_FUNC_COUNT );
|
||||
s_ntime = timestamp;
|
||||
}
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
timetravel10_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
78
algo/x11/timetravel10-gate.c
Normal file
78
algo/x11/timetravel10-gate.c
Normal file
@@ -0,0 +1,78 @@
|
||||
#include "timetravel10-gate.h"
|
||||
|
||||
void tt10_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_timetravel10_algo( algo_gate_t* gate )
|
||||
{
|
||||
#ifdef TIMETRAVEL10_4WAY
|
||||
init_tt10_4way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_timetravel10_4way;
|
||||
gate->hash = (void*)&timetravel10_4way_hash;
|
||||
#else
|
||||
init_tt10_ctx();
|
||||
gate->scanhash = (void*)&scanhash_timetravel10;
|
||||
gate->hash = (void*)&timetravel10_hash;
|
||||
#endif
|
||||
gate->set_target = (void*)&tt10_set_target;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
return true;
|
||||
};
|
||||
|
||||
inline void tt10_swap( int *a, int *b )
|
||||
{
|
||||
int c = *a;
|
||||
*a = *b;
|
||||
*b = c;
|
||||
}
|
||||
|
||||
inline void reverse( int *pbegin, int *pend )
|
||||
{
|
||||
while ( (pbegin != pend) && (pbegin != --pend) )
|
||||
{
|
||||
tt10_swap( pbegin, pend );
|
||||
pbegin++;
|
||||
}
|
||||
}
|
||||
|
||||
void tt10_next_permutation( int *pbegin, int *pend )
|
||||
{
|
||||
if ( pbegin == pend )
|
||||
return;
|
||||
|
||||
int *i = pbegin;
|
||||
++i;
|
||||
if ( i == pend )
|
||||
return;
|
||||
|
||||
i = pend;
|
||||
--i;
|
||||
|
||||
while (1)
|
||||
{
|
||||
int *j = i;
|
||||
--i;
|
||||
|
||||
if ( *i < *j )
|
||||
{
|
||||
int *k = pend;
|
||||
|
||||
while ( !(*i < *--k) ) /* do nothing */ ;
|
||||
|
||||
tt10_swap( i, k );
|
||||
reverse(j, pend);
|
||||
return; // true
|
||||
}
|
||||
|
||||
if ( i == pbegin )
|
||||
{
|
||||
reverse(pbegin, pend);
|
||||
return; // false
|
||||
}
|
||||
// else?
|
||||
}
|
||||
}
|
||||
|
39
algo/x11/timetravel10-gate.h
Normal file
39
algo/x11/timetravel10-gate.h
Normal file
@@ -0,0 +1,39 @@
|
||||
#ifndef TIMETRAVEL10_GATE_H__
|
||||
#define TIMETRAVEL10_GATE_H__ 1
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(HASH_4WAY) && defined(__AES__)
|
||||
#define TIMETRAVEL10_4WAY
|
||||
#endif
|
||||
|
||||
// BitCore Genesis Timestamp
|
||||
#define TT10_FUNC_BASE_TIMESTAMP 1492973331U
|
||||
#define TT10_FUNC_COUNT 10
|
||||
#define TT10_FUNC_COUNT_PERMUTATIONS 40320
|
||||
|
||||
void tt10_next_permutation( int *pbegin, int *pend );
|
||||
|
||||
bool register_timetravel10_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(TIMETRAVEL10_4WAY)
|
||||
|
||||
void timetravel10_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_timetravel10_4way( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done );
|
||||
|
||||
void init_tt10_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void timetravel10_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_timetravel10( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_tt10_ctx();
|
||||
|
||||
#endif
|
||||
|
@@ -1,11 +1,8 @@
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include "timetravel10-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "avxdefs.h"
|
||||
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/bmw/sph_bmw.h"
|
||||
#include "algo/jh/sph_jh.h"
|
||||
@@ -22,68 +19,8 @@
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
|
||||
// BitCore Genesis Timestamp
|
||||
#define HASH_FUNC_BASE_TIMESTAMP 1492973331U
|
||||
|
||||
#define HASH_FUNC_COUNT 10
|
||||
#define HASH_FUNC_COUNT_PERMUTATIONS 40320
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread int permutation[HASH_FUNC_COUNT] = { 0 };
|
||||
|
||||
inline void tt10_swap( int *a, int *b )
|
||||
{
|
||||
int c = *a;
|
||||
*a = *b;
|
||||
*b = c;
|
||||
}
|
||||
|
||||
inline void reverse( int *pbegin, int *pend )
|
||||
{
|
||||
while ( (pbegin != pend) && (pbegin != --pend) )
|
||||
{
|
||||
tt10_swap( pbegin, pend );
|
||||
pbegin++;
|
||||
}
|
||||
}
|
||||
|
||||
static void next_permutation( int *pbegin, int *pend )
|
||||
{
|
||||
if ( pbegin == pend )
|
||||
return;
|
||||
|
||||
int *i = pbegin;
|
||||
++i;
|
||||
if ( i == pend )
|
||||
return;
|
||||
|
||||
i = pend;
|
||||
--i;
|
||||
|
||||
while (1)
|
||||
{
|
||||
int *j = i;
|
||||
--i;
|
||||
|
||||
if ( *i < *j )
|
||||
{
|
||||
int *k = pend;
|
||||
|
||||
while ( !(*i < *--k) ) /* do nothing */ ;
|
||||
|
||||
tt10_swap( i, k );
|
||||
reverse(j, pend);
|
||||
return; // true
|
||||
}
|
||||
|
||||
if ( i == pbegin )
|
||||
{
|
||||
reverse(pbegin, pend);
|
||||
return; // false
|
||||
}
|
||||
// else?
|
||||
}
|
||||
}
|
||||
static __thread int permutation[TT10_FUNC_COUNT] = { 0 };
|
||||
|
||||
typedef struct {
|
||||
sph_blake512_context blake;
|
||||
@@ -125,7 +62,7 @@ void init_tt10_ctx()
|
||||
|
||||
void timetravel10_hash(void *output, const void *input)
|
||||
{
|
||||
uint32_t hash[ 16 * HASH_FUNC_COUNT ] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[ 16 * TT10_FUNC_COUNT ] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashA, *hashB;
|
||||
tt10_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
uint32_t dataLen = 64;
|
||||
@@ -136,7 +73,7 @@ void timetravel10_hash(void *output, const void *input)
|
||||
|
||||
memcpy( &ctx, &tt10_ctx, sizeof(tt10_ctx) );
|
||||
|
||||
for ( i = 0; i < HASH_FUNC_COUNT; i++ )
|
||||
for ( i = 0; i < TT10_FUNC_COUNT; i++ )
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
@@ -302,7 +239,7 @@ void timetravel10_hash(void *output, const void *input)
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(output, &hash[16 * (HASH_FUNC_COUNT - 1)], 32);
|
||||
memcpy(output, &hash[16 * (TT10_FUNC_COUNT - 1)], 32);
|
||||
}
|
||||
|
||||
int scanhash_timetravel10( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
@@ -328,12 +265,12 @@ int scanhash_timetravel10( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t timestamp = endiandata[17];
|
||||
if ( timestamp != s_ntime )
|
||||
{
|
||||
const int steps = ( timestamp - HASH_FUNC_BASE_TIMESTAMP )
|
||||
% HASH_FUNC_COUNT_PERMUTATIONS;
|
||||
for ( i = 0; i < HASH_FUNC_COUNT; i++ )
|
||||
const int steps = ( timestamp - TT10_FUNC_BASE_TIMESTAMP )
|
||||
% TT10_FUNC_COUNT_PERMUTATIONS;
|
||||
for ( i = 0; i < TT10_FUNC_COUNT; i++ )
|
||||
permutation[i] = i;
|
||||
for ( i = 0; i < steps; i++ )
|
||||
next_permutation( permutation, permutation + HASH_FUNC_COUNT );
|
||||
tt10_next_permutation( permutation, permutation + TT10_FUNC_COUNT );
|
||||
s_ntime = timestamp;
|
||||
|
||||
// do midstate precalc for first function
|
||||
@@ -398,6 +335,7 @@ int scanhash_timetravel10( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
work_set_target_ratio( work, hash );
|
||||
pdata[19] = nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
@@ -409,20 +347,3 @@ int scanhash_timetravel10( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void timetravel10_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_timetravel10_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
init_tt10_ctx();
|
||||
gate->scanhash = (void*)&scanhash_timetravel10;
|
||||
gate->hash = (void*)&timetravel10_hash;
|
||||
gate->set_target = (void*)&timetravel10_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
return true;
|
||||
};
|
||||
|
@@ -179,6 +179,7 @@ int scanhash_x11( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( hash64, ptarget ) )
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -189,14 +190,3 @@ int scanhash_x11( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
bool register_x11_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
init_x11_ctx();
|
||||
gate->scanhash = (void*)&scanhash_x11;
|
||||
gate->hash = (void*)&x11_hash;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
*/
|
||||
|
340
algo/x11/x11evo-4way.c
Normal file
340
algo/x11/x11evo-4way.c
Normal file
@@ -0,0 +1,340 @@
|
||||
#include "cpuminer-config.h"
|
||||
#include "x11evo-gate.h"
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <compat/portable_endian.h>
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/luffa/sph_luffa.h"
|
||||
#include "algo/cubehash/sph_cubehash.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/simd/sph_simd.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/luffa/sse2/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
#include "algo/simd/sse2/nist.h"
|
||||
|
||||
typedef struct {
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
hashState_luffa luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
hashState_echo echo;
|
||||
} x11evo_4way_ctx_holder;
|
||||
|
||||
static x11evo_4way_ctx_holder x11evo_4way_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void init_x11evo_4way_ctx()
|
||||
{
|
||||
blake512_4way_init( &x11evo_4way_ctx.blake );
|
||||
bmw512_4way_init( &x11evo_4way_ctx.bmw );
|
||||
init_groestl( &x11evo_4way_ctx.groestl, 64 );
|
||||
skein512_4way_init( &x11evo_4way_ctx.skein );
|
||||
jh512_4way_init( &x11evo_4way_ctx.jh );
|
||||
keccak512_4way_init( &x11evo_4way_ctx.keccak );
|
||||
init_luffa( &x11evo_4way_ctx.luffa, 512 );
|
||||
cubehashInit( &x11evo_4way_ctx.cube, 512, 16, 32 );
|
||||
sph_shavite512_init( &x11evo_4way_ctx.shavite );
|
||||
init_sd( &x11evo_4way_ctx.simd, 512 );
|
||||
init_echo( &x11evo_4way_ctx.echo, 512 );
|
||||
}
|
||||
|
||||
static char hashOrder[X11EVO_FUNC_COUNT + 1] = { 0 };
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
|
||||
void x11evo_4way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash0[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[16] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[16*4] __attribute__ ((aligned (64)));
|
||||
x11evo_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &x11evo_4way_ctx, sizeof(x11evo_4way_ctx) );
|
||||
|
||||
if ( s_seq == -1 )
|
||||
{
|
||||
uint32_t *data = (uint32_t*) input;
|
||||
const uint32_t ntime = data[17];
|
||||
evo_twisted_code( ntime, hashOrder );
|
||||
}
|
||||
|
||||
int i;
|
||||
int len = strlen( hashOrder );
|
||||
for ( i = 0; i < len; i++ )
|
||||
{
|
||||
char elem = hashOrder[i];
|
||||
uint8_t idx;
|
||||
if ( elem >= 'A' )
|
||||
idx = elem - 'A' + 10;
|
||||
else
|
||||
idx = elem - '0';
|
||||
|
||||
// int size = 64;
|
||||
|
||||
switch ( idx )
|
||||
{
|
||||
case 0:
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
break;
|
||||
case 1:
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
if ( i >= len-1 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
break;
|
||||
case 2:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(char*)hash1, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(char*)hash2, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
if ( i < len-1 )
|
||||
mm256_interleave_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 3:
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
if ( i >= len-1 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
break;
|
||||
case 4:
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
if ( i >= len-1 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
break;
|
||||
case 5:
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
if ( i >= len-1 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
break;
|
||||
case 6:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash0,
|
||||
(const BitSequence*)hash0, 64 );
|
||||
memcpy( &ctx.luffa, &x11evo_4way_ctx.luffa,
|
||||
sizeof(hashState_luffa) );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash1,
|
||||
(const BitSequence*)hash1, 64 );
|
||||
memcpy( &ctx.luffa, &x11evo_4way_ctx.luffa,
|
||||
sizeof(hashState_luffa) );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash2,
|
||||
(const BitSequence*)hash2, 64 );
|
||||
memcpy( &ctx.luffa, &x11evo_4way_ctx.luffa,
|
||||
sizeof(hashState_luffa) );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash3,
|
||||
(const BitSequence*)hash3, 64 );
|
||||
if ( i < len-1 )
|
||||
mm256_interleave_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 7:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
|
||||
(const byte*) hash0, 64 );
|
||||
memcpy( &ctx.cube, &x11evo_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash1,
|
||||
(const byte*) hash1, 64 );
|
||||
memcpy( &ctx.cube, &x11evo_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash2,
|
||||
(const byte*) hash2, 64 );
|
||||
memcpy( &ctx.cube, &x11evo_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
|
||||
(const byte*) hash3, 64 );
|
||||
if ( i < len-1 )
|
||||
mm256_interleave_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 8:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
sph_shavite512( &ctx.shavite, hash0, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
memcpy( &ctx.shavite, &x11evo_4way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash1, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
memcpy( &ctx.shavite, &x11evo_4way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash2, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
memcpy( &ctx.shavite, &x11evo_4way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash3, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
if ( i < len-1 )
|
||||
mm256_interleave_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 9:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash0,
|
||||
(const BitSequence *)hash0, 512 );
|
||||
memcpy( &ctx.simd, &x11evo_4way_ctx.simd, sizeof(hashState_sd) );
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash1,
|
||||
(const BitSequence *)hash1, 512 );
|
||||
memcpy( &ctx.simd, &x11evo_4way_ctx.simd, sizeof(hashState_sd) );
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash2,
|
||||
(const BitSequence *)hash2, 512 );
|
||||
memcpy( &ctx.simd, &x11evo_4way_ctx.simd, sizeof(hashState_sd) );
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash3,
|
||||
(const BitSequence *)hash3, 512 );
|
||||
if ( i < len-1 )
|
||||
mm256_interleave_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 10:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, 512 );
|
||||
memcpy( &ctx.echo, &x11evo_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence *) hash1, 512 );
|
||||
memcpy( &ctx.echo, &x11evo_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence *) hash2, 512 );
|
||||
memcpy( &ctx.echo, &x11evo_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
if ( i < len-1 )
|
||||
mm256_interleave_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
memcpy( state+64, hash2, 32 );
|
||||
memcpy( state+96, hash3, 32 );
|
||||
}
|
||||
|
||||
//static const uint32_t diff1targ = 0x0000ffff;
|
||||
|
||||
int scanhash_x11evo_4way( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
int ntime = endiandata[17];
|
||||
if ( ntime != s_ntime || s_seq == -1 )
|
||||
{
|
||||
evo_twisted_code( ntime, hashOrder );
|
||||
s_ntime = ntime;
|
||||
}
|
||||
|
||||
uint32_t hmask = 0xFFFFFFFF;
|
||||
if ( Htarg > 0 )
|
||||
{
|
||||
if ( Htarg <= 0xF )
|
||||
hmask = 0xFFFFFFF0;
|
||||
else if ( Htarg <= 0xFF )
|
||||
hmask = 0xFFFFFF00;
|
||||
else if ( Htarg <= 0xFFF )
|
||||
hmask = 0xFFFF000;
|
||||
else if ( Htarg <= 0xFFFF )
|
||||
hmask = 0xFFFF000;
|
||||
}
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
x11evo_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & hmask ) == 0 && fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] & hmask ) == 0 && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( (hash+16)[7] & hmask ) == 0 && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( (hash+24)[7] & hmask ) == 0 && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
95
algo/x11/x11evo-gate.c
Normal file
95
algo/x11/x11evo-gate.c
Normal file
@@ -0,0 +1,95 @@
|
||||
#include "x11evo-gate.h"
|
||||
|
||||
int s_seq = -1;
|
||||
|
||||
static inline int getCurrentAlgoSeq( uint32_t current_time )
|
||||
{
|
||||
// change once per day
|
||||
return (int) (current_time - X11EVO_INITIAL_DATE) / (60 * 60 * 24);
|
||||
}
|
||||
|
||||
// swap_vars doesn't work here
|
||||
void evo_swap( uint8_t *a, uint8_t *b )
|
||||
{
|
||||
uint8_t __tmp = *a;
|
||||
*a = *b;
|
||||
*b = __tmp;
|
||||
}
|
||||
|
||||
void initPerm( uint8_t n[], uint8_t count )
|
||||
{
|
||||
int i;
|
||||
for ( i = 0; i<count; i++ )
|
||||
n[i] = i;
|
||||
}
|
||||
|
||||
int nextPerm( uint8_t n[], uint32_t count )
|
||||
{
|
||||
uint32_t tail = 0, i = 0, j = 0;
|
||||
|
||||
if (unlikely( count <= 1 ))
|
||||
return 0;
|
||||
|
||||
for ( i = count - 1; i>0 && n[i - 1] >= n[i]; i-- );
|
||||
tail = i;
|
||||
|
||||
if ( tail > 0 )
|
||||
for ( j = count - 1; j>tail && n[j] <= n[tail - 1]; j-- );
|
||||
evo_swap( &n[tail - 1], &n[j] );
|
||||
|
||||
for ( i = tail, j = count - 1; i<j; i++, j-- )
|
||||
evo_swap( &n[i], &n[j] );
|
||||
|
||||
return ( tail != 0 );
|
||||
}
|
||||
|
||||
void getAlgoString( char *str, uint32_t count )
|
||||
{
|
||||
uint8_t algoList[X11EVO_FUNC_COUNT];
|
||||
char *sptr;
|
||||
int j;
|
||||
int k;
|
||||
initPerm( algoList, X11EVO_FUNC_COUNT );
|
||||
|
||||
for ( k = 0; k < count; k++ )
|
||||
nextPerm( algoList, X11EVO_FUNC_COUNT );
|
||||
|
||||
sptr = str;
|
||||
for ( j = 0; j < X11EVO_FUNC_COUNT; j++ )
|
||||
{
|
||||
if ( algoList[j] >= 10 )
|
||||
sprintf( sptr, "%c", 'A' + (algoList[j] - 10) );
|
||||
else
|
||||
sprintf( sptr, "%u", algoList[j] );
|
||||
sptr++;
|
||||
}
|
||||
*sptr = 0;
|
||||
|
||||
//applog(LOG_DEBUG, "nextPerm %s", str);
|
||||
}
|
||||
|
||||
void evo_twisted_code( uint32_t ntime, char *permstr )
|
||||
{
|
||||
int seq = getCurrentAlgoSeq( ntime );
|
||||
if ( s_seq != seq )
|
||||
{
|
||||
getAlgoString( permstr, seq );
|
||||
s_seq = seq;
|
||||
}
|
||||
}
|
||||
|
||||
bool register_x11evo_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X11EVO_4WAY)
|
||||
init_x11evo_4way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_x11evo_4way;
|
||||
gate->hash = (void*)&x11evo_4way_hash;
|
||||
#else
|
||||
init_x11evo_ctx();
|
||||
gate->scanhash = (void*)&scanhash_x11evo;
|
||||
gate->hash = (void*)&x11evo_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
|
||||
return true;
|
||||
};
|
||||
|
39
algo/x11/x11evo-gate.h
Normal file
39
algo/x11/x11evo-gate.h
Normal file
@@ -0,0 +1,39 @@
|
||||
#ifndef X11EVO_GATE_H__
|
||||
#define X11EVO_GATE_H__ 1
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(HASH_4WAY) && defined(__AES__)
|
||||
#define X11EVO_4WAY
|
||||
#endif
|
||||
|
||||
#define X11EVO_INITIAL_DATE 1462060800
|
||||
#define X11EVO_FUNC_COUNT 11
|
||||
|
||||
extern int s_seq;
|
||||
|
||||
bool register_x11evo_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(X11EVO_4WAY)
|
||||
|
||||
void x11evo_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_x11evo_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_x11evo_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void x11evo_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_x11evo( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_x11evo_ctx();
|
||||
|
||||
void evo_twisted_code( uint32_t ntime, char *permstr );
|
||||
|
||||
#endif
|
||||
|
@@ -1,5 +1,5 @@
|
||||
#include "cpuminer-config.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include "x11evo-gate.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
@@ -26,9 +26,6 @@
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
#include "algo/simd/sse2/nist.h"
|
||||
|
||||
#define INITIAL_DATE 1462060800
|
||||
#define HASH_FUNC_COUNT 11
|
||||
|
||||
typedef struct {
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_context groestl;
|
||||
@@ -70,94 +67,10 @@ void init_x11evo_ctx()
|
||||
sph_shavite512_init( &x11evo_ctx.shavite );
|
||||
}
|
||||
|
||||
/*
|
||||
uint32_t getCurrentAlgoSeq(uint32_t current_time, uint32_t base_time)
|
||||
{
|
||||
return (current_time - base_time) / (60 * 60 * 24);
|
||||
}
|
||||
*/
|
||||
|
||||
static inline int getCurrentAlgoSeq( uint32_t current_time )
|
||||
{
|
||||
// change once per day
|
||||
return (int) (current_time - INITIAL_DATE) / (60 * 60 * 24);
|
||||
}
|
||||
|
||||
// swap_vars doesn't work here
|
||||
void evo_swap( uint8_t *a, uint8_t *b )
|
||||
{
|
||||
uint8_t __tmp = *a;
|
||||
*a = *b;
|
||||
*b = __tmp;
|
||||
}
|
||||
|
||||
void initPerm( uint8_t n[], uint8_t count )
|
||||
{
|
||||
int i;
|
||||
for ( i = 0; i<count; i++ )
|
||||
n[i] = i;
|
||||
}
|
||||
|
||||
int nextPerm( uint8_t n[], uint32_t count )
|
||||
{
|
||||
uint32_t tail = 0, i = 0, j = 0;
|
||||
|
||||
if (unlikely( count <= 1 ))
|
||||
return 0;
|
||||
|
||||
for ( i = count - 1; i>0 && n[i - 1] >= n[i]; i-- );
|
||||
tail = i;
|
||||
|
||||
if ( tail > 0 )
|
||||
for ( j = count - 1; j>tail && n[j] <= n[tail - 1]; j-- );
|
||||
evo_swap( &n[tail - 1], &n[j] );
|
||||
|
||||
for ( i = tail, j = count - 1; i<j; i++, j-- )
|
||||
evo_swap( &n[i], &n[j] );
|
||||
|
||||
return ( tail != 0 );
|
||||
}
|
||||
|
||||
void getAlgoString( char *str, uint32_t count )
|
||||
{
|
||||
uint8_t algoList[HASH_FUNC_COUNT];
|
||||
char *sptr;
|
||||
int j;
|
||||
int k;
|
||||
initPerm( algoList, HASH_FUNC_COUNT );
|
||||
|
||||
for ( k = 0; k < count; k++ )
|
||||
nextPerm( algoList, HASH_FUNC_COUNT );
|
||||
|
||||
sptr = str;
|
||||
for ( j = 0; j < HASH_FUNC_COUNT; j++ )
|
||||
{
|
||||
if ( algoList[j] >= 10 )
|
||||
sprintf( sptr, "%c", 'A' + (algoList[j] - 10) );
|
||||
else
|
||||
sprintf( sptr, "%u", algoList[j] );
|
||||
sptr++;
|
||||
}
|
||||
*sptr = 0;
|
||||
|
||||
//applog(LOG_DEBUG, "nextPerm %s", str);
|
||||
}
|
||||
|
||||
static char hashOrder[HASH_FUNC_COUNT + 1] = { 0 };
|
||||
static char hashOrder[X11EVO_FUNC_COUNT + 1] = { 0 };
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static int s_seq = -1;
|
||||
|
||||
static void evo_twisted_code(uint32_t ntime, char *permstr)
|
||||
{
|
||||
int seq = getCurrentAlgoSeq(ntime);
|
||||
if (s_seq != seq)
|
||||
{
|
||||
getAlgoString(permstr, seq);
|
||||
s_seq = seq;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void x11evo_hash( void *state, const void *input )
|
||||
void x11evo_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash[16] __attribute__ ((aligned (64)));
|
||||
x11evo_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
@@ -242,10 +155,10 @@ static inline void x11evo_hash( void *state, const void *input )
|
||||
memcpy( state, hash, 32 );
|
||||
}
|
||||
|
||||
static const uint32_t diff1targ = 0x0000ffff;
|
||||
//static const uint32_t diff1targ = 0x0000ffff;
|
||||
|
||||
int scanhash_x11evo( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
unsigned long *hashes_done )
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||
@@ -274,19 +187,20 @@ int scanhash_x11evo( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
else if ( Htarg <= 0xFFF )
|
||||
hmask = 0xFFFF000;
|
||||
else if ( Htarg <= 0xFFFF )
|
||||
hmask = 0xFFFF000;
|
||||
hmask = 0xFFFF000;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
pdata[19] = ++n;
|
||||
be32enc( &endiandata[19], n );
|
||||
x11evo_hash( hash64, &endiandata );
|
||||
x11evo_hash( hash64, endiandata );
|
||||
if ( ( hash64[7] & hmask ) == 0 )
|
||||
{
|
||||
if ( fulltest( hash64, ptarget ) )
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -296,13 +210,3 @@ int scanhash_x11evo( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_x11evo_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_x11evo;
|
||||
gate->hash = (void*)&x11evo_hash;
|
||||
init_x11evo_ctx();
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -161,6 +161,7 @@ int scanhash_x11gost( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
|
@@ -116,6 +116,7 @@ int scanhash_phi1612( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
pdata[19] = nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
|
@@ -70,6 +70,7 @@ int scanhash_skunk( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
|
@@ -234,6 +234,7 @@ int scanhash_x13(int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
|
@@ -17,7 +17,7 @@
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/simd/sse2/nist.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/sm3/sph_sm3.h"
|
||||
#include "algo/sm3/sm3-hash-4way.h"
|
||||
#include "algo/hamsi/sph_hamsi.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
|
||||
@@ -33,7 +33,7 @@ typedef struct {
|
||||
sph_shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
hashState_echo echo;
|
||||
sm3_ctx_t sm3;
|
||||
sm3_4way_ctx_t sm3;
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
} x13sm3_4way_ctx_holder;
|
||||
@@ -54,7 +54,7 @@ void init_x13sm3_4way_ctx()
|
||||
sph_shavite512_init( &x13sm3_4way_ctx.shavite );
|
||||
init_sd( &x13sm3_4way_ctx.simd, 512 );
|
||||
init_echo( &x13sm3_4way_ctx.echo, 512 );
|
||||
sm3_init( &x13sm3_4way_ctx.sm3 );
|
||||
sm3_4way_init( &x13sm3_4way_ctx.sm3 );
|
||||
sph_hamsi512_init( &x13sm3_4way_ctx.hamsi );
|
||||
sph_fugue512_init( &x13sm3_4way_ctx.fugue );
|
||||
};
|
||||
@@ -85,14 +85,11 @@ void x13sm3_4way_hash( void *state, const void *input )
|
||||
|
||||
// Groestl
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
memcpy( &ctx.groestl, &x13sm3_4way_ctx.groestl,
|
||||
sizeof(hashState_groestl) );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
memcpy( &ctx.groestl, &x13sm3_4way_ctx.groestl,
|
||||
sizeof(hashState_groestl) );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
memcpy( &ctx.groestl, &x13sm3_4way_ctx.groestl,
|
||||
sizeof(hashState_groestl) );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
// Parallel 4way
|
||||
@@ -178,6 +175,8 @@ void x13sm3_4way_hash( void *state, const void *input )
|
||||
(const BitSequence *) hash3, 512 );
|
||||
|
||||
// SM3
|
||||
uint32_t sm3_vhash[32*4] __attribute__ ((aligned (64)));
|
||||
memset( sm3_vhash, 0, sizeof sm3_vhash );
|
||||
uint32_t sm3_hash0[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash0, 0, sizeof sm3_hash0 );
|
||||
uint32_t sm3_hash1[32] __attribute__ ((aligned (32)));
|
||||
@@ -187,17 +186,11 @@ void x13sm3_4way_hash( void *state, const void *input )
|
||||
uint32_t sm3_hash3[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash3, 0, sizeof sm3_hash3 );
|
||||
|
||||
sph_sm3( &ctx.sm3, hash0, 64 );
|
||||
sph_sm3_close( &ctx.sm3, sm3_hash0 );
|
||||
memcpy( &ctx.sm3, &x13sm3_4way_ctx.sm3, sizeof(sm3_ctx_t) );
|
||||
sph_sm3( &ctx.sm3, hash1, 64 );
|
||||
sph_sm3_close( &ctx.sm3, sm3_hash1 );
|
||||
memcpy( &ctx.sm3, &x13sm3_4way_ctx.sm3, sizeof(sm3_ctx_t) );
|
||||
sph_sm3( &ctx.sm3, hash2, 64 );
|
||||
sph_sm3_close( &ctx.sm3, sm3_hash2 );
|
||||
memcpy( &ctx.sm3, &x13sm3_4way_ctx.sm3, sizeof(sm3_ctx_t) );
|
||||
sph_sm3( &ctx.sm3, hash3, 64 );
|
||||
sph_sm3_close( &ctx.sm3, sm3_hash3 );
|
||||
mm_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
sm3_4way( &ctx.sm3, vhash, 64 );
|
||||
sm3_4way_close( &ctx.sm3, sm3_vhash );
|
||||
mm_deinterleave_4x32( sm3_hash0, sm3_hash1, sm3_hash2, sm3_hash3,
|
||||
sm3_vhash, 1024 );
|
||||
|
||||
// Hamsi
|
||||
sph_hamsi512( &ctx.hamsi, sm3_hash0, 64 );
|
||||
|
@@ -224,6 +224,7 @@ int scanhash_x13sm3( int thr_id, struct work *work,
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
|
@@ -65,6 +65,7 @@ int scanhash_axiom(int thr_id, struct work *work,
|
||||
if (hash64[7] < Htarg && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
n++;
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(HASH_4WAY) && defined(__AES__)
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define POLYTIMOS_4WAY
|
||||
#endif
|
||||
|
||||
|
@@ -233,6 +233,7 @@ int scanhash_x14(int thr_id, struct work *work,
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
|
@@ -47,7 +47,6 @@ void init_x15_4way_ctx()
|
||||
{
|
||||
blake512_4way_init( &x15_4way_ctx.blake );
|
||||
bmw512_4way_init( &x15_4way_ctx.bmw );
|
||||
sph_bmw512_init( &x15_4way_ctx.bmw );
|
||||
init_groestl( &x15_4way_ctx.groestl, 64 );
|
||||
skein512_4way_init( &x15_4way_ctx.skein );
|
||||
jh512_4way_init( &x15_4way_ctx.jh );
|
||||
|
@@ -245,6 +245,7 @@ int scanhash_x15(int thr_id, struct work *work,
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
|
@@ -266,6 +266,7 @@ int scanhash_x17(int thr_id, struct work *work,
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
@@ -281,13 +282,3 @@ int scanhash_x17(int thr_id, struct work *work,
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
bool register_x17_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
init_x17_ctx();
|
||||
gate->scanhash = (void*)&scanhash_x17;
|
||||
gate->hash = (void*)&x17hash;
|
||||
return true;
|
||||
};
|
||||
*/
|
||||
|
24
avxdefs.h
24
avxdefs.h
@@ -35,10 +35,18 @@
|
||||
#define mm_one_64 _mm_set1_epi64x( 1ULL )
|
||||
#define mm_one_32 _mm_set1_epi32( 1UL )
|
||||
#define mm_one_16 _mm_set1_epi16( 1U )
|
||||
#define mm_one_8 _mm_set1_epi8( 1U )
|
||||
|
||||
// Constant minus 1
|
||||
#define mm_neg1 _mm_set1_epi64x( 0xFFFFFFFFFFFFFFFFULL )
|
||||
|
||||
// Lane index, useful for byte rotate using shuffle
|
||||
#define mm_lanex_64 _mm_set_epi64( 1ULL, 0ULL );
|
||||
#define mm_lanex_32 _mm_set_epi32( 3UL, 2UL, 1UL, 0UL );
|
||||
#define mm_lanex_16 _mm_set_epi16( 7U, 6U, 5U, 4U, 3U, 2U, 1U, 0U );
|
||||
#define mm_lanex_8 _mm_set_epi8( 15U, 14U, 13U, 12U, 11U, 10U , 9U, 8U, \
|
||||
7U, 6U, 5U, 4U, 3U, 2U, 1U, 0U );
|
||||
|
||||
//
|
||||
// Basic operations without equivalent SIMD intrinsic
|
||||
|
||||
@@ -327,6 +335,16 @@ inline __m128i mm_byteswap_16( __m128i x )
|
||||
// Constant minus 1
|
||||
#define mm256_neg1 _mm256_set1_epi64x( 0xFFFFFFFFFFFFFFFFULL )
|
||||
|
||||
// Lane index, useful for rotate using permutevar
|
||||
#define mm256_lane_64 _mm_set_epi64x( 3ULL, 2ULL, 1ULL, 0ULL );
|
||||
#define mm256_lane_32 _mm_set_epi32( 7UL, 6UL, 5UL, 4UL, 3UL, 2UL, 1UL, 0UL );
|
||||
#define mm256_lane_16 _mm_set_epi16( 15U, 14U, 13U, 12U, 11U, 10U , 9U, 8U, \
|
||||
7U, 6U, 5U, 4U, 3U, 2U, 1U, 0U );
|
||||
#define mm256_lane_8 _mm_set_epi8( 31U, 30U, 29U, 28U, 27U, 26U, 25U, 24U, \
|
||||
23U, 22U, 21U, 20U, 19U, 18U, 17U, 16U, \
|
||||
15U, 14U, 13U, 12U, 11U, 10U , 9U, 8U, \
|
||||
7U, 6U, 5U, 4U, 3U, 2U, 1U, 0U );
|
||||
|
||||
//
|
||||
// Basic operations without SIMD equivalent
|
||||
|
||||
@@ -1109,7 +1127,7 @@ inline void mm256_deinterleave_8x32x( uint32_t *dst0, uint32_t *dst1,
|
||||
}
|
||||
|
||||
// Can't do it in place
|
||||
inline void mm256_reinterleave_4x64x( void *dst, void *src, int bit_len )
|
||||
inline void mm256_reinterleave_4x64( void *dst, void *src, int bit_len )
|
||||
{
|
||||
__m256i* d = (__m256i*)dst;
|
||||
uint32_t *s = (uint32_t*)src;
|
||||
@@ -1146,7 +1164,8 @@ inline void mm256_reinterleave_4x64x( void *dst, void *src, int bit_len )
|
||||
// likely of no use.
|
||||
// convert 4x32 byte (128 bit) vectors to 4x64 (256 bit) vectors for AVX2
|
||||
// bit_len must be multiple of 64
|
||||
inline void mm256_reinterleave_4x64( uint64_t *dst, uint32_t *src,
|
||||
// broken
|
||||
inline void mm256_reinterleave_4x64x( uint64_t *dst, uint32_t *src,
|
||||
int bit_len )
|
||||
{
|
||||
uint32_t *d = (uint32_t*)dst;
|
||||
@@ -1200,6 +1219,7 @@ inline void mm256_reinterleave_4x32( void *dst, void *src, int bit_len )
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
// not used
|
||||
inline void mm_reinterleave_4x32( void *dst, void *src, int bit_len )
|
||||
{
|
||||
uint32_t *d = (uint32_t*)dst;
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.7.9.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.7.10.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.7.9'
|
||||
PACKAGE_STRING='cpuminer-opt 3.7.9'
|
||||
PACKAGE_VERSION='3.7.10'
|
||||
PACKAGE_STRING='cpuminer-opt 3.7.10'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.7.9 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.7.10 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1392,7 +1392,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.7.9:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.7.10:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1497,7 +1497,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.7.9
|
||||
cpuminer-opt configure 3.7.10
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.7.9, which was
|
||||
It was created by cpuminer-opt $as_me 3.7.10, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2981,7 +2981,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.7.9'
|
||||
VERSION='3.7.10'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.7.9, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.7.10, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6743,7 +6743,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.7.9
|
||||
cpuminer-opt config.status 3.7.10
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.7.9])
|
||||
AC_INIT([cpuminer-opt], [3.7.10])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
41
cpu-miner.c
41
cpu-miner.c
@@ -346,6 +346,7 @@ static bool work_decode( const json_t *val, struct work *work )
|
||||
work->targetdiff = target_to_diff( work->target );
|
||||
// for api stats, on longpoll pools
|
||||
stratum_diff = work->targetdiff;
|
||||
work->sharediff = 0;
|
||||
algo_gate.display_extra_data( work, &net_blocks );
|
||||
return true;
|
||||
}
|
||||
@@ -755,6 +756,7 @@ static int share_result( int result, struct work *work, const char *reason )
|
||||
uint32_t total_submits;
|
||||
float rate;
|
||||
char rate_s[8] = {0};
|
||||
double sharediff = work ? work->sharediff : stratum.sharediff;
|
||||
int i;
|
||||
|
||||
pthread_mutex_lock(&stats_lock);
|
||||
@@ -814,6 +816,8 @@ static int share_result( int result, struct work *work, const char *reason )
|
||||
sprintf(hr, "%.2f", hashrate );
|
||||
}
|
||||
|
||||
if ( sharediff == 0 )
|
||||
{
|
||||
#if ((defined(_WIN64) || defined(__WINDOWS__)))
|
||||
applog( LOG_NOTICE, "%s %lu/%lu (%s%%), %s %sH, %s %sH/s",
|
||||
sres, ( result ? accepted_count : rejected_count ),
|
||||
@@ -824,6 +828,20 @@ static int share_result( int result, struct work *work, const char *reason )
|
||||
total_submits, rate_s, hc, hc_units, hr, hr_units,
|
||||
(uint32_t)cpu_temp(0) );
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#if ((defined(_WIN64) || defined(__WINDOWS__)))
|
||||
applog( LOG_NOTICE, "%s %lu/%lu (%s%%), diff %.3g, %s %sH/s",
|
||||
sres, ( result ? accepted_count : rejected_count ),
|
||||
total_submits, rate_s, sharediff, hr, hr_units );
|
||||
#else
|
||||
applog( LOG_NOTICE, "%s %lu/%lu (%s%%), diff %.3g, %s %sH/s, %dC",
|
||||
sres, ( result ? accepted_count : rejected_count ),
|
||||
total_submits, rate_s, sharediff, hr, hr_units,
|
||||
(uint32_t)cpu_temp(0) );
|
||||
#endif
|
||||
}
|
||||
|
||||
if (reason)
|
||||
{
|
||||
@@ -1026,6 +1044,7 @@ static bool submit_upstream_work( CURL *curl, struct work *work )
|
||||
}
|
||||
if ( have_stratum )
|
||||
{
|
||||
stratum.sharediff = work->sharediff;
|
||||
algo_gate.build_stratum_request( req, work, &stratum );
|
||||
if ( unlikely( !stratum_send_line( &stratum, req ) ) )
|
||||
{
|
||||
@@ -1569,7 +1588,7 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
uint32_t *end_nonce_ptr, bool clean_job )
|
||||
{
|
||||
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
|
||||
|
||||
|
||||
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
|
||||
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr )
|
||||
|| ( work->job_id != g_work->job_id ) ) )
|
||||
@@ -2984,25 +3003,22 @@ bool check_cpu_capability ()
|
||||
}
|
||||
if ( sw_has_avx2 && !( cpu_has_avx2 && cpu_has_aes ) )
|
||||
{
|
||||
if ( sw_has_4way && algo_has_4way )
|
||||
printf( "A CPU with AES and AVX2 is required to use 4way!\n" );
|
||||
else if ( algo_has_avx2 )
|
||||
printf( "A CPU with AES and AVX2 is required!\n" );
|
||||
printf( "The SW build requires a CPU with AES and AVX2!\n" );
|
||||
return false;
|
||||
}
|
||||
if ( sw_has_avx && !( cpu_has_avx && cpu_has_aes ) )
|
||||
if ( sw_has_avx && !cpu_has_avx )
|
||||
{
|
||||
printf( "A CPU with AES and AVX2 is required!\n" );
|
||||
printf( "The SW build requires a CPU with AVX!\n" );
|
||||
return false;
|
||||
}
|
||||
if ( sw_has_aes && algo_has_aes && !cpu_has_aes )
|
||||
if ( sw_has_aes && !cpu_has_aes )
|
||||
{
|
||||
printf( "A CPU with AES is required!\n" );
|
||||
printf( "The SW build requires a CPU with AES!\n" );
|
||||
return false;
|
||||
}
|
||||
if ( sw_has_sha && algo_has_sha && !cpu_has_sha )
|
||||
if ( sw_has_sha && !cpu_has_sha )
|
||||
{
|
||||
printf( "A CPU with SHA is required!\n" );
|
||||
printf( "The SW build requires a CPU with SHA!\n" );
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -3187,6 +3203,9 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( num_cpus != opt_n_threads )
|
||||
applog( LOG_INFO,"%u CPU cores available, %u miner threads selected.",
|
||||
num_cpus, opt_n_threads );
|
||||
if ( opt_affinity != -1 )
|
||||
{
|
||||
if ( num_cpus > 64 )
|
||||
|
Reference in New Issue
Block a user