mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v23.5
This commit is contained in:
@@ -1,16 +1,27 @@
|
||||
#include "lyra2-gate.h"
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
#include "lyra2.h"
|
||||
#include "algo/blake/blake256-hash.h"
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define LYRA2Z_16WAY 1
|
||||
#elif defined(__AVX2__)
|
||||
#define LYRA2Z_8WAY 1
|
||||
#elif defined(__SSE2__)
|
||||
#define LYRA2Z_4WAY 1
|
||||
//#else
|
||||
// NEON 1 way SIMD
|
||||
#endif
|
||||
|
||||
#define LYRA2Z_MATRIX_SIZE BLOCK_LEN_INT64 * 8 * 8 * 8
|
||||
|
||||
#if defined(LYRA2Z_16WAY)
|
||||
|
||||
__thread uint64_t* lyra2z_16way_matrix;
|
||||
|
||||
bool lyra2z_16way_thread_init()
|
||||
{
|
||||
return ( lyra2z_16way_matrix = _mm_malloc( 2*LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
return ( lyra2z_16way_matrix = mm_malloc( 2*LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static void lyra2z_16way_hash( void *state, const void *midstate_vars,
|
||||
@@ -153,7 +164,7 @@ __thread uint64_t* lyra2z_8way_matrix;
|
||||
|
||||
bool lyra2z_8way_thread_init()
|
||||
{
|
||||
return ( lyra2z_8way_matrix = _mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
return ( lyra2z_8way_matrix = mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static void lyra2z_8way_hash( void *state, const void *midstate_vars,
|
||||
@@ -259,12 +270,13 @@ int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#elif defined(LYRA2Z_4WAY)
|
||||
|
||||
|
||||
// SSE2 or NEON
|
||||
|
||||
__thread uint64_t* lyra2z_4way_matrix;
|
||||
|
||||
bool lyra2z_4way_thread_init()
|
||||
{
|
||||
return ( lyra2z_4way_matrix = _mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
return ( lyra2z_4way_matrix = mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_4way_context l2z_4way_blake_mid;
|
||||
@@ -275,59 +287,90 @@ void lyra2z_4way_midstate( const void* input )
|
||||
blake256_4way_update( &l2z_4way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2z_4way_hash( void *state, const void *input )
|
||||
void lyra2z_4way_hash( void *hash, const void *midstate_vars,
|
||||
const void *midhash, const void *block )
|
||||
{
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake256_4way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
// blake256_4way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
blake256_4way_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
|
||||
|
||||
/*
|
||||
memcpy( &ctx_blake, &l2z_4way_blake_mid, sizeof l2z_4way_blake_mid );
|
||||
blake256_4way_update( &ctx_blake, input + (64*4), 16 );
|
||||
blake256_4way_close( &ctx_blake, vhash );
|
||||
*/
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
|
||||
LYRA2Z( lyra2z_4way_matrix, state , 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_4way_matrix, state+32, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_4way_matrix, state+64, 32, hash2, 32, hash2, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_4way_matrix, state+96, 32, hash3, 32, hash3, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_4way_matrix, hash , 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_4way_matrix, hash+32, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_4way_matrix, hash+64, 32, hash2, 32, hash2, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_4way_matrix, hash+96, 32, hash3, 32, hash3, 32, 8, 8, 8 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint64_t hash[4*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t midstate_vars[16*4] __attribute__ ((aligned (64)));
|
||||
v128_t block0_hash[8] __attribute__ ((aligned (64)));
|
||||
v128_t block_buf[16] __attribute__ ((aligned (64)));
|
||||
uint32_t phash[8] __attribute__ ((aligned (32))) =
|
||||
{
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||
};
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint64_t *ptarget = (uint64_t*)work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
const int thr_id = mythr->id;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
const v128u32_t four = v128_32(4);
|
||||
|
||||
if ( bench ) ptarget[7] = 0x0000ff;
|
||||
// Prehash first block
|
||||
blake256_transform_le( phash, pdata, 512, 0, 14 );
|
||||
|
||||
v128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
*noncev = _mm_set_epi32( n+3, n+2, n+1, n );
|
||||
lyra2z_4way_midstate( vdata );
|
||||
block0_hash[0] = v128_32( phash[0] );
|
||||
block0_hash[1] = v128_32( phash[1] );
|
||||
block0_hash[2] = v128_32( phash[2] );
|
||||
block0_hash[3] = v128_32( phash[3] );
|
||||
block0_hash[4] = v128_32( phash[4] );
|
||||
block0_hash[5] = v128_32( phash[5] );
|
||||
block0_hash[6] = v128_32( phash[6] );
|
||||
block0_hash[7] = v128_32( phash[7] );
|
||||
|
||||
// Build vectored second block, interleave last 16 bytes of data using
|
||||
// unique nonces.
|
||||
block_buf[ 0] = v128_32( pdata[16] );
|
||||
block_buf[ 1] = v128_32( pdata[17] );
|
||||
block_buf[ 2] = v128_32( pdata[18] );
|
||||
block_buf[ 3] = v128_set32( n+3, n+2, n+1, n );
|
||||
block_buf[ 4] = v128_32( 0x80000000 );
|
||||
block_buf[13] = v128_32( 1 );
|
||||
block_buf[15] = v128_32( 640 );
|
||||
|
||||
// Partialy prehash second block without touching nonces
|
||||
blake256_4way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
|
||||
do {
|
||||
lyra2z_4way_hash( hash, vdata );
|
||||
lyra2z_4way_hash( hash, midstate_vars, block0_hash, block_buf );
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
{
|
||||
const uint64_t *lane_hash = hash + (lane<<2);
|
||||
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm_add_epi32( *noncev, _mm_set1_epi32( 4 ) );
|
||||
block_buf[ 3] = v128_add32( block_buf[ 3], four );
|
||||
n += 4;
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
|
||||
@@ -336,5 +379,97 @@ int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// not used
|
||||
|
||||
__thread uint64_t* lyra2z_matrix;
|
||||
|
||||
bool lyra2z_thread_init()
|
||||
{
|
||||
const int i = BLOCK_LEN_INT64 * 8 * 8 * 8;
|
||||
lyra2z_matrix = mm_malloc( i, 64 );
|
||||
return lyra2z_matrix;
|
||||
}
|
||||
|
||||
static __thread blake256_context lyra2z_blake_mid;
|
||||
|
||||
void lyra2z_midstate( const void* input )
|
||||
{
|
||||
blake256_init( &lyra2z_blake_mid );
|
||||
blake256_update( &lyra2z_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2z_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t _ALIGN(32) hash[16];
|
||||
blake256_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &lyra2z_blake_mid, sizeof (blake256_context) );
|
||||
blake256_update( &ctx_blake, input + 64, 16 );
|
||||
blake256_close( &ctx_blake, hash );
|
||||
|
||||
LYRA2Z( lyra2z_matrix, hash, 32, hash, 32, hash, 32, 8, 8, 8 );
|
||||
|
||||
memcpy( state, hash, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
if (opt_benchmark) ptarget[7] = 0x0000ff;
|
||||
|
||||
// for ( int i = 0; i < 20; i++ ) endiandata[i] = bswap_32( pdata[i] );
|
||||
v128_bswap32_80( endiandata, pdata );
|
||||
|
||||
lyra2z_midstate( endiandata );
|
||||
|
||||
do {
|
||||
endiandata[19] = bswap_32( nonce );
|
||||
lyra2z_hash( hash, endiandata );
|
||||
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(LYRA2Z_16WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_16way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_16way;
|
||||
// gate->hash = (void*)&lyra2z_16way_hash;
|
||||
#elif defined(LYRA2Z_8WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_8way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_8way;
|
||||
// gate->hash = (void*)&lyra2z_8way_hash;
|
||||
#elif defined(LYRA2Z_4WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_4way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_4way;
|
||||
gate->hash = (void*)&lyra2z_4way_hash;
|
||||
#else
|
||||
gate->miner_thread_init = (void*)&lyra2z_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z;
|
||||
gate->hash = (void*)&lyra2z_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT | NEON_OPT;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user