mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.10.6
This commit is contained in:
@@ -1,15 +1,206 @@
|
||||
#include "lyra2-gate.h"
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
|
||||
#if defined (ALLIUM_4WAY)
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
|
||||
#if defined (ALLIUM_8WAY)
|
||||
|
||||
typedef struct {
|
||||
blake256_8way_context blake;
|
||||
keccak256_8way_context keccak;
|
||||
cube_4way_context cube;
|
||||
skein256_8way_context skein;
|
||||
hashState_groestl256 groestl;
|
||||
} allium_8way_ctx_holder;
|
||||
|
||||
static __thread allium_8way_ctx_holder allium_8way_ctx;
|
||||
|
||||
bool init_allium_8way_ctx()
|
||||
{
|
||||
keccak256_8way_init( &allium_8way_ctx.keccak );
|
||||
cube_4way_init( &allium_8way_ctx.cube, 256, 16, 32 );
|
||||
skein256_8way_init( &allium_8way_ctx.skein );
|
||||
init_groestl256( &allium_8way_ctx.groestl, 32 );
|
||||
return true;
|
||||
}
|
||||
|
||||
void allium_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhashB[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
allium_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx, &allium_8way_ctx, sizeof(allium_8way_ctx) );
|
||||
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
|
||||
blake256_8way_close( &ctx.blake, vhash );
|
||||
|
||||
rintrlv_8x32_8x64( vhashA, vhash, 256 );
|
||||
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
|
||||
keccak256_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, 256 );
|
||||
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
|
||||
/*
|
||||
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash4, 32, hash4, 32, hash4, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash5, 32, hash5, 32, hash5, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash6, 32, hash6, 32, hash6, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash7, 32, hash7, 32, hash7, 32, 1, 8, 8 );
|
||||
*/
|
||||
|
||||
|
||||
|
||||
intrlv_4x128( vhashA, hash0, hash1, hash2, hash3, 256 );
|
||||
intrlv_4x128( vhashB, hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
|
||||
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
|
||||
|
||||
/*
|
||||
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash4, 32, hash4, 32, hash4, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash5, 32, hash5, 32, hash5, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash6, 32, hash6, 32, hash6, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash7, 32, hash7, 32, hash7, 32, 1, 8, 8 );
|
||||
*/
|
||||
|
||||
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, 256 );
|
||||
|
||||
skein256_8way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, 256 );
|
||||
|
||||
update_and_final_groestl256( &ctx.groestl, state, hash0, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+32, hash1, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+64, hash2, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+96, hash3, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+128, hash4, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+160, hash5, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+192, hash6, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+224, hash7, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
}
|
||||
|
||||
int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
blake256_8way_init( &allium_8way_ctx.blake );
|
||||
blake256_8way_update( &allium_8way_ctx.blake, vdata, 64 );
|
||||
|
||||
do {
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
n+3, n+2, n+1, n ) );
|
||||
|
||||
allium_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ ) if ( (hash+(lane<<3))[7] <= Htarg )
|
||||
{
|
||||
if ( fulltest( hash+(lane<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#elif defined (ALLIUM_4WAY)
|
||||
|
||||
|
||||
typedef struct {
|
||||
blake256_4way_context blake;
|
||||
keccak256_4way_context keccak;
|
||||
|
||||
@@ -129,7 +129,11 @@ bool register_lyra2rev2_algo( algo_gate_t* gate )
|
||||
|
||||
bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(LYRA2Z_8WAY)
|
||||
#if defined(LYRA2Z_16WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_16way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_16way;
|
||||
gate->hash = (void*)&lyra2z_16way_hash;
|
||||
#elif defined(LYRA2Z_8WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_8way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_8way;
|
||||
gate->hash = (void*)&lyra2z_8way_hash;
|
||||
@@ -142,7 +146,7 @@ bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_lyra2z;
|
||||
gate->hash = (void*)&lyra2z_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT | AVX512_OPT;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
@@ -170,7 +174,11 @@ bool register_lyra2h_algo( algo_gate_t* gate )
|
||||
|
||||
bool register_allium_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (ALLIUM_4WAY)
|
||||
#if defined (ALLIUM_8WAY)
|
||||
gate->miner_thread_init = (void*)&init_allium_8way_ctx;
|
||||
gate->scanhash = (void*)&scanhash_allium_8way;
|
||||
gate->hash = (void*)&allium_8way_hash;
|
||||
#elif defined (ALLIUM_4WAY)
|
||||
gate->miner_thread_init = (void*)&init_allium_4way_ctx;
|
||||
gate->scanhash = (void*)&scanhash_allium_4way;
|
||||
gate->hash = (void*)&allium_4way_hash;
|
||||
@@ -179,7 +187,7 @@ bool register_allium_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_allium;
|
||||
gate->hash = (void*)&allium_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT | AVX512_OPT;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
@@ -85,17 +85,25 @@ bool init_lyra2rev2_ctx();
|
||||
|
||||
/////////////////////////
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#define LYRA2Z_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
#define LYRA2Z_8WAY
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define LYRA2Z_16WAY 1
|
||||
#elif defined(__AVX2__)
|
||||
#define LYRA2Z_8WAY 1
|
||||
#elif defined(__SSE2__)
|
||||
#define LYRA2Z_4WAY 1
|
||||
#endif
|
||||
|
||||
|
||||
#define LYRA2Z_MATRIX_SIZE BLOCK_LEN_INT64 * 8 * 8 * 8
|
||||
|
||||
#if defined(LYRA2Z_8WAY)
|
||||
#if defined(LYRA2Z_16WAY)
|
||||
|
||||
void lyra2z_16way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool lyra2z_16way_thread_init();
|
||||
|
||||
#elif defined(LYRA2Z_8WAY)
|
||||
|
||||
void lyra2z_8way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
@@ -144,13 +152,22 @@ bool lyra2h_thread_init();
|
||||
|
||||
//////////////////////////////////
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define ALLIUM_4WAY
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define ALLIUM_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define ALLIUM_4WAY 1
|
||||
#endif
|
||||
|
||||
bool register_allium_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(ALLIUM_4WAY)
|
||||
#if defined(ALLIUM_8WAY)
|
||||
|
||||
void allium_8way_hash( void *state, const void *input );
|
||||
int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_allium_8way_ctx();
|
||||
|
||||
#elif defined(ALLIUM_4WAY)
|
||||
|
||||
void allium_4way_hash( void *state, const void *input );
|
||||
int scanhash_allium_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
@@ -26,14 +26,17 @@
|
||||
#include "lyra2.h"
|
||||
#include "sponge.h"
|
||||
|
||||
// LYRA2RE 8 cols 8 rows used by lyea2re, allium, phi2, x22i, x25x.
|
||||
// LYRA2RE 8 cols 8 rows used by lyra2re, allium, phi2, x22i, x25x,
|
||||
// dynamic matrix allocation.
|
||||
//
|
||||
// LYRA2REV2 4 cols 4 rows used by lyra2rev2.
|
||||
// LYRA2REV2 4 cols 4 rows used by lyra2rev2 and x21s, static matrix
|
||||
// allocation.
|
||||
//
|
||||
// LYRA2REV3 4 cols 4 rows with an extra twist in calculating
|
||||
// rowa in the wandering phase. Used by lyra2rev3.
|
||||
// rowa in the wandering phase. Used by lyra2rev3. Static matrix
|
||||
// allocation.
|
||||
//
|
||||
// LYRA2Z various cols & rows and supports 80 input. Used by lyra2z,
|
||||
// LYRA2Z various cols & rows and supports 80 byte input. Used by lyra2z,
|
||||
// lyra2z330, lyra2h,
|
||||
|
||||
|
||||
@@ -60,7 +63,7 @@
|
||||
*/
|
||||
|
||||
// For lyra2rev3.
|
||||
// convert a simple offset to an index into interleaved data.
|
||||
// convert a simple offset to an index into 2x4 u64 interleaved data.
|
||||
// good for state and 4 row matrix.
|
||||
// index = ( int( off / 4 ) * 2 ) + ( off mod 4 )
|
||||
|
||||
@@ -202,12 +205,8 @@ int LYRA2REV3_2WAY( uint64_t* wholeMatrix, void *K, uint64_t kLen,
|
||||
|
||||
// hard coded for 32 byte input as well as matrix size.
|
||||
// Other required versions include 80 byte input and different block
|
||||
// sizez
|
||||
// sizes.
|
||||
|
||||
//int LYRA2REV3_2WAY( uint64_t* wholeMatrix, void *K, uint64_t kLen,
|
||||
// const void *pwd, const uint64_t pwdlen, const void *salt,
|
||||
// const uint64_t saltlen, const uint64_t timeCost, const uint64_t nRows,
|
||||
// const uint64_t nCols )
|
||||
{
|
||||
//====================== Basic variables ============================//
|
||||
uint64_t _ALIGN(256) state[32];
|
||||
@@ -335,159 +334,111 @@ int LYRA2REV3_2WAY( uint64_t* wholeMatrix, void *K, uint64_t kLen,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
#if 0
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
int LYRA2Z( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
const uint64_t pwdlen, const void *salt, const uint64_t saltlen,
|
||||
const uint64_t timeCost, const uint64_t nRows,
|
||||
const uint64_t nCols )
|
||||
|
||||
int LYRA2Z_2WAY( uint64_t* wholeMatrix, void *K, uint64_t kLen,
|
||||
const void *pwd, const uint64_t pwdlen, const uint64_t timeCost,
|
||||
const uint64_t nRows, const uint64_t nCols )
|
||||
{
|
||||
//========================== Basic variables ============================//
|
||||
uint64_t _ALIGN(256) state[16];
|
||||
int64_t row = 2; //index of row to be processed
|
||||
int64_t prev = 1; //index of prev (last row ever computed/modified)
|
||||
int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering)
|
||||
int64_t tau; //Time Loop iterator
|
||||
int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
|
||||
int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
|
||||
int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
|
||||
// int64_t i; //auxiliary iteration counter
|
||||
uint64_t _ALIGN(256) state[32];
|
||||
int64_t row = 2;
|
||||
int64_t prev = 1;
|
||||
int64_t rowa0 = 0;
|
||||
int64_t rowa1 = 0;
|
||||
int64_t tau;
|
||||
int64_t step = 1;
|
||||
int64_t window = 2;
|
||||
int64_t gap = 1;
|
||||
//=======================================================================/
|
||||
|
||||
//======= Initializing the Memory Matrix and pointers to it =============//
|
||||
//Tries to allocate enough space for the whole memory matrix
|
||||
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
|
||||
// const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
// memset( wholeMatrix, 0, ROW_LEN_BYTES * nRows );
|
||||
|
||||
//==== Getting the password + salt + basil padded with 10*1 ============//
|
||||
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
|
||||
//but this ensures that the password copied locally will be overwritten as soon as possible
|
||||
|
||||
//First, we clean enough blocks for the password, salt, basil and padding
|
||||
uint64_t nBlocksInput = ( ( saltlen + pwdlen + 6 *
|
||||
uint64_t nBlocksInput = ( ( pwdlen + pwdlen + 6 *
|
||||
sizeof (uint64_t) ) / BLOCK_LEN_BLAKE2_SAFE_BYTES ) + 1;
|
||||
byte *ptrByte = (byte*) wholeMatrix;
|
||||
memset( ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES );
|
||||
|
||||
//Prepends the password
|
||||
memcpy(ptrByte, pwd, pwdlen);
|
||||
ptrByte += pwdlen;
|
||||
uint64_t *ptr = wholeMatrix;
|
||||
uint64_t *pw = (uint64_t*)pwd;
|
||||
|
||||
//Concatenates the salt
|
||||
memcpy(ptrByte, salt, saltlen);
|
||||
ptrByte += saltlen;
|
||||
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
|
||||
memcpy(ptrByte, &kLen, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy(ptrByte, &pwdlen, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy(ptrByte, &saltlen, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy(ptrByte, &timeCost, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy(ptrByte, &nRows, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy(ptrByte, &nCols, sizeof (uint64_t));
|
||||
ptrByte += sizeof (uint64_t);
|
||||
memcpy( ptr, pw, 2*pwdlen ); // password
|
||||
ptr += pwdlen>>2;
|
||||
memcpy( ptr, pw, 2*pwdlen ); // password lane 1
|
||||
ptr += pwdlen>>2;
|
||||
|
||||
//Now comes the padding
|
||||
*ptrByte = 0x80; //first byte of padding: right after the password
|
||||
ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
|
||||
ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
|
||||
*ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
|
||||
// now build the rest interleaving on the fly.
|
||||
ptr[0] = ptr[ 4] = kLen;
|
||||
ptr[1] = ptr[ 5] = pwdlen;
|
||||
ptr[2] = ptr[ 6] = pwdlen; // saltlen
|
||||
ptr[3] = ptr[ 7] = timeCost;
|
||||
ptr[8] = ptr[12] = nRows;
|
||||
ptr[9] = ptr[13] = nCols;
|
||||
ptr[10] = ptr[14] = 0x80;
|
||||
ptr[11] = ptr[15] = 0x0100000000000000;
|
||||
|
||||
//=================== Initializing the Sponge State ====================//
|
||||
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
|
||||
// uint64_t *state = _mm_malloc(16 * sizeof(uint64_t), 32);
|
||||
// if (state == NULL) {
|
||||
// return -1;
|
||||
// }
|
||||
// initState( state );
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
|
||||
//============================== Setup Phase =============================//
|
||||
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
absorbBlockBlake2Safe_2way( state, ptrWord, nBlocksInput,
|
||||
BLOCK_LEN_BLAKE2_SAFE_INT64 );
|
||||
|
||||
absorbBlockBlake2Safe( state, ptrWord, nBlocksInput,
|
||||
BLOCK_LEN_BLAKE2_SAFE_INT64 );
|
||||
/*
|
||||
for ( i = 0; i < nBlocksInput; i++ )
|
||||
{
|
||||
absorbBlockBlake2Safe( state, ptrWord ); //absorbs each block of pad(pwd || salt || basil)
|
||||
ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT64; //goes to next block of pad(pwd || salt || basil)
|
||||
}
|
||||
*/
|
||||
//Initializes M[0] and M[1]
|
||||
reducedSqueezeRow0(state, &wholeMatrix[0], nCols); //The locally copied password is most likely overwritten here
|
||||
reducedDuplexRow1(state, &wholeMatrix[0], &wholeMatrix[ROW_LEN_INT64], nCols);
|
||||
//Initializes M[0] and M[1]
|
||||
reducedSqueezeRow0_2way( state, &wholeMatrix[0], nCols );
|
||||
|
||||
do {
|
||||
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
|
||||
reducedDuplexRowSetup(state, &wholeMatrix[prev*ROW_LEN_INT64], &wholeMatrix[rowa*ROW_LEN_INT64], &wholeMatrix[row*ROW_LEN_INT64], nCols);
|
||||
reducedDuplexRow1_2way( state, &wholeMatrix[0],
|
||||
&wholeMatrix[ 2 * ROW_LEN_INT64 ], nCols );
|
||||
|
||||
//updates the value of row* (deterministically picked during Setup))
|
||||
rowa = (rowa + step) & (window - 1);
|
||||
//update prev: it now points to the last row ever computed
|
||||
prev = row;
|
||||
//updates row: goes to the next row to be computed
|
||||
row++;
|
||||
do
|
||||
{
|
||||
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
|
||||
|
||||
//Checks if all rows in the window where visited.
|
||||
if (rowa == 0) {
|
||||
step = window + gap; //changes the step: approximately doubles its value
|
||||
window *= 2; //doubles the size of the re-visitation window
|
||||
gap = -gap; //inverts the modifier to the step
|
||||
}
|
||||
reducedDuplexRowSetup_2way( state, &wholeMatrix[ 2* prev * ROW_LEN_INT64],
|
||||
&wholeMatrix[ 2* rowa0 * ROW_LEN_INT64],
|
||||
&wholeMatrix[ 2* row*ROW_LEN_INT64],
|
||||
nCols );
|
||||
|
||||
} while (row < nRows);
|
||||
rowa0 = (rowa0 + step) & (window - 1);
|
||||
prev = row;
|
||||
row++;
|
||||
|
||||
//======================== Wandering Phase =============================//
|
||||
row = 0; //Resets the visitation to the first row of the memory matrix
|
||||
for ( tau = 1; tau <= timeCost; tau++ )
|
||||
{
|
||||
//Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1
|
||||
if ( rowa0 == 0 )
|
||||
{
|
||||
step = window + gap;
|
||||
window *= 2;
|
||||
gap = -gap;
|
||||
}
|
||||
} while ( row < nRows );
|
||||
|
||||
row = 0;
|
||||
for ( tau = 1; tau <= timeCost; tau++ )
|
||||
{
|
||||
step = (tau % 2 == 0) ? -1 : nRows / 2 - 1;
|
||||
do {
|
||||
//Selects a pseudorandom index row*
|
||||
//----------------------------------------------------------------------
|
||||
//rowa = ((unsigned int)state[0]) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
|
||||
rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
|
||||
//-----------------------------------------------------------------
|
||||
do
|
||||
{
|
||||
rowa0 = state[ 0 ] % nRows;
|
||||
rowa1 = state[ 4 ] % nRows;
|
||||
|
||||
//Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
|
||||
reducedDuplexRow(state, &wholeMatrix[prev*ROW_LEN_INT64], &wholeMatrix[rowa*ROW_LEN_INT64], &wholeMatrix[row*ROW_LEN_INT64], nCols);
|
||||
reducedDuplexRow_2way( state, &wholeMatrix[ 2* prev * ROW_LEN_INT64 ],
|
||||
&wholeMatrix[ 2* rowa0 * ROW_LEN_INT64 ],
|
||||
&wholeMatrix[ 2* rowa1 * ROW_LEN_INT64 ],
|
||||
&wholeMatrix[ 2* row *ROW_LEN_INT64 ],
|
||||
nCols );
|
||||
|
||||
//update prev: it now points to the last row ever computed
|
||||
prev = row;
|
||||
|
||||
//updates row: goes to the next row to be computed
|
||||
//---------------------------------------------------------------
|
||||
//row = (row + step) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
|
||||
row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
|
||||
//--------------------------------------------------------------------
|
||||
row = (row + step) % nRows;
|
||||
|
||||
} while (row != 0);
|
||||
}
|
||||
}
|
||||
|
||||
//========================= Wrap-up Phase ===============================//
|
||||
//Absorbs the last block of the memory matrix
|
||||
absorbBlock(state, &wholeMatrix[rowa*ROW_LEN_INT64]);
|
||||
absorbBlock_2way( state, &wholeMatrix[ 2 * rowa0 *ROW_LEN_INT64 ],
|
||||
&wholeMatrix[ 2 * rowa1 *ROW_LEN_INT64 ] );
|
||||
|
||||
//Squeezes the key
|
||||
squeeze( state, K, kLen );
|
||||
//Squeezes the key
|
||||
squeeze_2way( state, K, (unsigned int) kLen );
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
// Lyra2RE doesn't like the new wholeMatrix implementation
|
||||
int LYRA2RE_2WAY( void *K, uint64_t kLen, const void *pwd,
|
||||
@@ -495,7 +446,7 @@ int LYRA2RE_2WAY( void *K, uint64_t kLen, const void *pwd,
|
||||
const uint64_t nRows, const uint64_t nCols )
|
||||
{
|
||||
//====================== Basic variables ============================//
|
||||
uint64_t _ALIGN(256) state[16];
|
||||
uint64_t _ALIGN(256) state[32];
|
||||
int64_t row = 2; //index of row to be processed
|
||||
int64_t prev = 1; //index of prev (last row ever computed/modified)
|
||||
int64_t rowa0 = 0;
|
||||
@@ -517,25 +468,15 @@ int LYRA2RE_2WAY( void *K, uint64_t kLen, const void *pwd,
|
||||
: BLOCK_LEN_BLAKE2_SAFE_BYTES;
|
||||
|
||||
i = (int64_t)ROW_LEN_BYTES * nRows;
|
||||
uint64_t *wholeMatrix = _mm_malloc( i, 64 );
|
||||
uint64_t *wholeMatrix = _mm_malloc( 2*i, 64 );
|
||||
if (wholeMatrix == NULL)
|
||||
return -1;
|
||||
|
||||
#if defined(__AVX2__)
|
||||
memset_zero_256( (__m256i*)wholeMatrix, i>>5 );
|
||||
#elif defined(__SSE2__)
|
||||
memset_zero_128( (__m128i*)wholeMatrix, i>>4 );
|
||||
#else
|
||||
memset( wholeMatrix, 0, i );
|
||||
#endif
|
||||
memset_zero_512( (__m512i*)wholeMatrix, i>>5 );
|
||||
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
uint64_t *pw = (uint64_t*)pwd;
|
||||
|
||||
//=== Getting the password + salt + basil padded with 10*1 ==========//
|
||||
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
|
||||
//but this ensures that the password copied locally will be overwritten as soon as possible
|
||||
|
||||
//First, we clean enough blocks for the password, salt, basil and padding
|
||||
int64_t nBlocksInput = ( ( pwdlen + pwdlen + 6 * sizeof(uint64_t) )
|
||||
/ BLOCK_LEN_BLAKE2_SAFE_BYTES ) + 1;
|
||||
@@ -558,66 +499,8 @@ int LYRA2RE_2WAY( void *K, uint64_t kLen, const void *pwd,
|
||||
ptr[10] = ptr[14] = 0x80;
|
||||
ptr[11] = ptr[15] = 0x0100000000000000;
|
||||
|
||||
|
||||
/*
|
||||
byte *ptrByte = (byte*) wholeMatrix;
|
||||
|
||||
//Prepends the password
|
||||
memcpy(ptrByte, pwd, pwdlen);
|
||||
ptrByte += pwdlen;
|
||||
|
||||
//Concatenates the salt
|
||||
memcpy(ptrByte, salt, saltlen);
|
||||
ptrByte += saltlen;
|
||||
|
||||
// memset( ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES
|
||||
// - (saltlen + pwdlen) );
|
||||
|
||||
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
|
||||
memcpy(ptrByte, &kLen, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
v64 = pwdlen;
|
||||
memcpy(ptrByte, &v64, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
v64 = saltlen;
|
||||
memcpy(ptrByte, &v64, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
v64 = timeCost;
|
||||
memcpy(ptrByte, &v64, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
v64 = nRows;
|
||||
memcpy(ptrByte, &v64, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
v64 = nCols;
|
||||
memcpy(ptrByte, &v64, sizeof(int64_t));
|
||||
ptrByte += sizeof(uint64_t);
|
||||
|
||||
//Now comes the padding
|
||||
*ptrByte = 0x80; //first byte of padding: right after the password
|
||||
ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
|
||||
ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
|
||||
*ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
|
||||
|
||||
//================= Initializing the Sponge State ====================//
|
||||
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
|
||||
|
||||
// initState( state );
|
||||
|
||||
//========================= Setup Phase =============================//
|
||||
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
|
||||
|
||||
ptrWord = wholeMatrix;
|
||||
|
||||
*/
|
||||
|
||||
absorbBlockBlake2Safe_2way( state, ptrWord, nBlocksInput, BLOCK_LEN );
|
||||
/*
|
||||
for (i = 0; i < nBlocksInput; i++)
|
||||
{
|
||||
absorbBlockBlake2Safe( state, ptrWord ); //absorbs each block of pad(pwd || salt || basil)
|
||||
ptrWord += BLOCK_LEN; //goes to next block of pad(pwd || salt || basil)
|
||||
}
|
||||
*/
|
||||
|
||||
//Initializes M[0] and M[1]
|
||||
reducedSqueezeRow0_2way( state, &wholeMatrix[0], nCols ); //The locally copied password is most likely overwritten here
|
||||
|
||||
|
||||
@@ -62,6 +62,8 @@ int LYRA2(void *K, int64_t kLen, const void *pwd, int32_t pwdlen, const void *sa
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
int LYRA2RE_2WAY( void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen,
|
||||
uint64_t timeCost, uint64_t nRows, uint64_t nCols );
|
||||
|
||||
int LYRA2REV2_2WAY( uint64_t*, void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols );
|
||||
@@ -69,6 +71,9 @@ int LYRA2REV2_2WAY( uint64_t*, void *K, uint64_t kLen, const void *pwd,
|
||||
int LYRA2REV3_2WAY( uint64_t*, void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols );
|
||||
|
||||
int LYRA2Z_2WAY( uint64_t*, void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols );
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* LYRA2_H_ */
|
||||
|
||||
@@ -1,13 +1,240 @@
|
||||
#include "lyra2-gate.h"
|
||||
|
||||
#ifdef LYRA2Z_4WAY
|
||||
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
#include "lyra2.h"
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
|
||||
#if defined(LYRA2Z_16WAY)
|
||||
|
||||
__thread uint64_t* lyra2z_16way_matrix;
|
||||
|
||||
bool lyra2z_16way_thread_init()
|
||||
{
|
||||
return ( lyra2z_16way_matrix = _mm_malloc( 2*LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_16way_context l2z_16way_blake_mid;
|
||||
|
||||
void lyra2z_16way_midstate( const void* input )
|
||||
{
|
||||
blake256_16way_init( &l2z_16way_blake_mid );
|
||||
blake256_16way_update( &l2z_16way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2z_16way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash8[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash9[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash10[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash11[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash12[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash13[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash14[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash15[8] __attribute__ ((aligned (64)));
|
||||
blake256_16way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &l2z_16way_blake_mid, sizeof l2z_16way_blake_mid );
|
||||
blake256_16way_update( &ctx_blake, input + (64*16), 16 );
|
||||
blake256_16way_close( &ctx_blake, vhash );
|
||||
|
||||
dintrlv_16x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
hash8, hash9, hash10, hash11 ,hash12, hash13, hash14, hash15,
|
||||
vhash, 256 );
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash8, hash9, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash8, hash9, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash10, hash11, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash10, hash11, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash12, hash13, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash12, hash13, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash14, hash15, 256 );
|
||||
LYRA2Z_2WAY( lyra2z_16way_matrix, vhash, 32, vhash, 32, 8, 8, 8 );
|
||||
dintrlv_2x256( hash14, hash15, vhash, 256 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+ 32, hash1, 32 );
|
||||
memcpy( state+ 64, hash2, 32 );
|
||||
memcpy( state+ 96, hash3, 32 );
|
||||
memcpy( state+128, hash4, 32 );
|
||||
memcpy( state+160, hash5, 32 );
|
||||
memcpy( state+192, hash6, 32 );
|
||||
memcpy( state+224, hash7, 32 );
|
||||
memcpy( state+256, hash8, 32 );
|
||||
memcpy( state+288, hash9, 32 );
|
||||
memcpy( state+320, hash10, 32 );
|
||||
memcpy( state+352, hash11, 32 );
|
||||
memcpy( state+384, hash12, 32 );
|
||||
memcpy( state+416, hash13, 32 );
|
||||
memcpy( state+448, hash14, 32 );
|
||||
memcpy( state+480, hash15, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||
lyra2z_16way_midstate( vdata );
|
||||
|
||||
do {
|
||||
*noncev = mm512_bswap_32( _mm512_set_epi32( n+15, n+14, n+13, n+12,
|
||||
n+11, n+10, n+ 9, n+ 8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4,
|
||||
n+ 3, n+ 2, n+ 1, n ) );
|
||||
lyra2z_16way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 16;
|
||||
} while ( (n < max_nonce-16) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined(LYRA2Z_8WAY)
|
||||
|
||||
__thread uint64_t* lyra2z_8way_matrix;
|
||||
|
||||
bool lyra2z_8way_thread_init()
|
||||
{
|
||||
return ( lyra2z_8way_matrix = _mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_8way_context l2z_8way_blake_mid;
|
||||
|
||||
void lyra2z_8way_midstate( const void* input )
|
||||
{
|
||||
blake256_8way_init( &l2z_8way_blake_mid );
|
||||
blake256_8way( &l2z_8way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2z_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
blake256_8way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &l2z_8way_blake_mid, sizeof l2z_8way_blake_mid );
|
||||
blake256_8way( &ctx_blake, input + (64*8), 16 );
|
||||
blake256_8way_close( &ctx_blake, vhash );
|
||||
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
LYRA2Z( lyra2z_8way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash2, 32, hash2, 32, hash2, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash3, 32, hash3, 32, hash3, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash4, 32, hash4, 32, hash4, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash5, 32, hash5, 32, hash5, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash6, 32, hash6, 32, hash6, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash7, 32, hash7, 32, hash7, 32, 8, 8, 8 );
|
||||
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+ 32, hash1, 32 );
|
||||
memcpy( state+ 64, hash2, 32 );
|
||||
memcpy( state+ 96, hash3, 32 );
|
||||
memcpy( state+128, hash4, 32 );
|
||||
memcpy( state+160, hash5, 32 );
|
||||
memcpy( state+192, hash6, 32 );
|
||||
memcpy( state+224, hash7, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
lyra2z_8way_midstate( vdata );
|
||||
|
||||
do {
|
||||
*noncev = mm256_bswap_32(
|
||||
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||
lyra2z_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#elif defined(LYRA2Z_4WAY)
|
||||
|
||||
|
||||
__thread uint64_t* lyra2z_4way_matrix;
|
||||
|
||||
bool lyra2z_4way_thread_init()
|
||||
@@ -85,100 +312,3 @@ int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(LYRA2Z_8WAY)
|
||||
|
||||
__thread uint64_t* lyra2z_8way_matrix;
|
||||
|
||||
bool lyra2z_8way_thread_init()
|
||||
{
|
||||
return ( lyra2z_8way_matrix = _mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_8way_context l2z_8way_blake_mid;
|
||||
|
||||
void lyra2z_8way_midstate( const void* input )
|
||||
{
|
||||
blake256_8way_init( &l2z_8way_blake_mid );
|
||||
blake256_8way( &l2z_8way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2z_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
blake256_8way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &l2z_8way_blake_mid, sizeof l2z_8way_blake_mid );
|
||||
blake256_8way( &ctx_blake, input + (64*8), 16 );
|
||||
blake256_8way_close( &ctx_blake, vhash );
|
||||
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
LYRA2Z( lyra2z_8way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash2, 32, hash2, 32, hash2, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash3, 32, hash3, 32, hash3, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash4, 32, hash4, 32, hash4, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash5, 32, hash5, 32, hash5, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash6, 32, hash6, 32, hash6, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash7, 32, hash7, 32, hash7, 32, 8, 8, 8 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+ 32, hash1, 32 );
|
||||
memcpy( state+ 64, hash2, 32 );
|
||||
memcpy( state+ 96, hash3, 32 );
|
||||
memcpy( state+128, hash4, 32 );
|
||||
memcpy( state+160, hash5, 32 );
|
||||
memcpy( state+192, hash6, 32 );
|
||||
memcpy( state+224, hash7, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
lyra2z_8way_midstate( vdata );
|
||||
|
||||
do {
|
||||
*noncev = mm256_bswap_32(
|
||||
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||
lyra2z_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user