This commit is contained in:
Jay D Dee
2019-05-30 16:59:49 -04:00
parent eb3f57bfc7
commit 77c5ae80ab
82 changed files with 6906 additions and 3706 deletions

View File

@@ -55,11 +55,11 @@ void allium_4way_hash( void *state, const void *input )
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*)hash0, 32 );
cubehashReinit( &ctx.cube );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*)hash1, 32 );
cubehashReinit( &ctx.cube );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*)hash2, 32 );
cubehashReinit( &ctx.cube );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*)hash3, 32 );
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );

View File

@@ -27,7 +27,7 @@ bool register_lyra2rev3_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_lyra2rev3;
gate->hash = (void*)&lyra2rev3_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | SSE42_OPT | AVX2_OPT;
gate->miner_thread_init = (void*)&lyra2rev3_thread_init;
gate->set_target = (void*)&alt_set_target;
return true;

View File

@@ -17,14 +17,14 @@ bool register_lyra2rev3_algo( algo_gate_t* gate );
void lyra2rev3_4way_hash( void *state, const void *input );
int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
uint64_t *hashes_done, struct thr_info *mythr );
bool init_lyra2rev3_4way_ctx();
#else
void lyra2rev3_hash( void *state, const void *input );
int scanhash_lyra2rev3( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
uint64_t *hashes_done, struct thr_info *mythr );
bool init_lyra2rev3_ctx();
#endif

View File

@@ -7,8 +7,7 @@
#include "lyra2.h"
#include "algo-gate-api.h"
#include "avxdefs.h"
#ifndef NO_AES_NI
#if defined(__AES__)
#include "algo/groestl/aes_ni/hash-groestl256.h"
#endif
@@ -18,10 +17,10 @@ typedef struct {
sph_blake256_context blake;
sph_keccak256_context keccak;
sph_skein256_context skein;
#ifdef NO_AES_NI
sph_groestl256_context groestl;
#else
#if defined(__AES__)
hashState_groestl256 groestl;
#else
sph_groestl256_context groestl;
#endif
} lyra2re_ctx_holder;
@@ -33,10 +32,10 @@ void init_lyra2re_ctx()
sph_blake256_init(&lyra2re_ctx.blake);
sph_keccak256_init(&lyra2re_ctx.keccak);
sph_skein256_init(&lyra2re_ctx.skein);
#ifdef NO_AES_NI
sph_groestl256_init(&lyra2re_ctx.groestl);
#else
#if defined(__AES__)
init_groestl256( &lyra2re_ctx.groestl, 32 );
#else
sph_groestl256_init(&lyra2re_ctx.groestl);
#endif
}
@@ -72,11 +71,11 @@ void lyra2re_hash(void *state, const void *input)
sph_skein256(&ctx.skein, hashA, 32);
sph_skein256_close(&ctx.skein, hashB);
#ifdef NO_AES_NI
#if defined(__AES__)
update_and_final_groestl256( &ctx.groestl, hashA, hashB, 256 );
#else
sph_groestl256( &ctx.groestl, hashB, 32 );
sph_groestl256_close( &ctx.groestl, hashA );
#else
update_and_final_groestl256( &ctx.groestl, hashA, hashB, 256 );
#endif
memcpy(state, hashA, 32);

View File

@@ -48,11 +48,11 @@ void lyra2rev2_4way_hash( void *state, const void *input )
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
cubehashReinit( &ctx.cube );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash1, (const byte*) hash1, 32 );
cubehashReinit( &ctx.cube );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash2, (const byte*) hash2, 32 );
cubehashReinit( &ctx.cube );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );
LYRA2REV2( l2v2_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
@@ -65,13 +65,13 @@ void lyra2rev2_4way_hash( void *state, const void *input )
skein256_4way_close( &ctx.skein, vhash64 );
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
cubehashReinit( &ctx.cube );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
cubehashReinit( &ctx.cube );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash1, (const byte*) hash1, 32 );
cubehashReinit( &ctx.cube );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash2, (const byte*) hash2, 32 );
cubehashReinit( &ctx.cube );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 256 );

View File

@@ -43,11 +43,11 @@ void lyra2rev3_4way_hash( void *state, const void *input )
LYRA2REV3( l2v3_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
cubehashReinit( &ctx.cube );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash1, (const byte*) hash1, 32 );
cubehashReinit( &ctx.cube );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash2, (const byte*) hash2, 32 );
cubehashReinit( &ctx.cube );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );
LYRA2REV3( l2v3_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
@@ -57,54 +57,67 @@ void lyra2rev3_4way_hash( void *state, const void *input )
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
bmw256_4way( &ctx.bmw, vhash, 32 );
bmw256_4way_close( &ctx.bmw, vhash );
bmw256_4way_close( &ctx.bmw, state );
mm128_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
}
int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t lane_hash[8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
const uint32_t Htarg = ptarget[7];
uint32_t *nonces = work->nonces;
int num_found = 0;
uint32_t *noncep = vdata + 76; // 19*4
__m128i *noncev = (__m128i*)vdata + 19; // aligned
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
swab32_array( edata, pdata, 20 );
// Need big endian data
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
do {
be32enc( noncep, n );
be32enc( noncep+1, n+1 );
be32enc( noncep+2, n+2 );
be32enc( noncep+3, n+3 );
do
{
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
lyra2rev3_4way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
{
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
work_set_target_ratio( work, lane_hash );
if ( submit_work( mythr, work ) )
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
accepted_share_count + rejected_share_count + 1,
thr_id, lane );
else
applog( LOG_WARNING, "Failed to submit share." );
}
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce-4)
&& !work_restart[thr_id].restart);
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
return num_found;
return 0;
}
#endif

View File

@@ -8,7 +8,6 @@
typedef struct {
cubehashParam cube;
// cubehashParam cube2;
sph_blake256_context blake;
sph_bmw256_context bmw;
@@ -20,7 +19,6 @@ static __thread sph_blake256_context l2v3_blake_mid;
bool init_lyra2rev3_ctx()
{
cubehashInit( &lyra2v3_ctx.cube, 256, 16, 32 );
// cubehashInit( &lyra2v3_ctx.cube2, 256, 16, 32 );
sph_blake256_init( &lyra2v3_ctx.blake );
sph_bmw256_init( &lyra2v3_ctx.bmw );
return true;
@@ -59,44 +57,51 @@ void lyra2rev3_hash( void *state, const void *input )
memcpy( state, hash, 32 );
}
int scanhash_lyra2rev3(int thr_id, struct work *work,
uint32_t max_nonce, uint64_t *hashes_done)
int scanhash_lyra2rev3( int thr_id, struct work *work,
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t endiandata[20] __attribute__ ((aligned (64)));
uint32_t hash[8] __attribute__((aligned(64)));
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
const uint32_t Htarg = ptarget[7];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t endiandata[20] __attribute__ ((aligned (64)));
uint32_t hash[8] __attribute__((aligned(64)));
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
const uint32_t Htarg = ptarget[7];
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
swab32_array( endiandata, pdata, 20 );
// need big endian data
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
l2v3_blake256_midstate( endiandata );
l2v3_blake256_midstate( endiandata );
do {
be32enc(&endiandata[19], nonce);
lyra2rev3_hash(hash, endiandata);
do
{
be32enc(&endiandata[19], nonce);
lyra2rev3_hash(hash, endiandata);
if (hash[7] <= Htarg )
{
if( fulltest(hash, ptarget) )
{
pdata[19] = nonce;
work_set_target_ratio( work, hash );
*hashes_done = pdata[19] - first_nonce;
return 1;
}
}
nonce++;
if (hash[7] <= Htarg )
{
if( fulltest(hash, ptarget) )
{
pdata[19] = nonce;
work_set_target_ratio( work, hash );
*hashes_done = pdata[19] - first_nonce;
return 1;
}
}
nonce++;
} while (nonce < max_nonce && !work_restart[thr_id].restart);
} while (nonce < max_nonce && !work_restart[thr_id].restart);
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}

View File

@@ -91,7 +91,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
#elif defined(__SSE4_2__)
#elif defined(__SSE2__)
// process 2 columns in parallel
// returns void, all args updated
@@ -108,14 +108,14 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
G_2X64( s0, s2, s4, s6 ); \
G_2X64( s1, s3, s5, s7 ); \
mm128_ror256_1x64( s2, s3 ); \
mm128_swap256_128( s4, s5 ); \
mm128_rol256_1x64( s6, s7 ); \
mm128_rol1x64_256( s2, s3 ); \
mm128_swap128_256( s4, s5 ); \
mm128_rol1x64_256( s6, s7 ); \
G_2X64( s0, s2, s4, s6 ); \
G_2X64( s1, s3, s5, s7 ); \
mm128_rol256_1x64( s2, s3 ); \
mm128_swap256_128( s4, s5 ); \
mm128_ror256_1x64( s6, s7 );
mm128_rol1x64_256( s2, s3 ); \
mm128_swap128_256( s4, s5 ); \
mm128_ror1x64_256( s6, s7 );
#define LYRA_12_ROUNDS_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \