This commit is contained in:
Jay D Dee
2025-06-20 20:31:41 -04:00
parent dd99580a4c
commit 66191db93c
86 changed files with 2701 additions and 4322 deletions

View File

@@ -26,9 +26,9 @@
#if defined (ALLIUM_16WAY)
typedef union {
keccak256_8way_context keccak;
keccak256_8x64_context keccak;
cube_4way_2buf_context cube;
skein256_8way_context skein;
skein256_8x64_context skein;
#if defined(__VAES__)
groestl256_4way_context groestl;
#else
@@ -60,7 +60,7 @@ static void allium_16way_hash( void *state, const void *midstate_vars,
uint32_t hash15[8] __attribute__ ((aligned (32)));
allium_16way_ctx_holder ctx __attribute__ ((aligned (64)));
blake256_16way_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
blake256_16x32_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
dintrlv_16x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
hash8, hash9, hash10, hash11, hash12, hash13, hash14, hash15,
@@ -70,12 +70,12 @@ static void allium_16way_hash( void *state, const void *midstate_vars,
intrlv_8x64( vhashB, hash8, hash9, hash10, hash11, hash12, hash13, hash14,
hash15, 256 );
keccak256_8way_init( &ctx.keccak );
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
keccak256_8way_close( &ctx.keccak, vhashA);
keccak256_8way_init( &ctx.keccak );
keccak256_8way_update( &ctx.keccak, vhashB, 32 );
keccak256_8way_close( &ctx.keccak, vhashB);
keccak256_8x64_init( &ctx.keccak );
keccak256_8x64_update( &ctx.keccak, vhashA, 32 );
keccak256_8x64_close( &ctx.keccak, vhashA);
keccak256_8x64_init( &ctx.keccak );
keccak256_8x64_update( &ctx.keccak, vhashB, 32 );
keccak256_8x64_close( &ctx.keccak, vhashB);
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhashA, 256 );
@@ -153,12 +153,12 @@ static void allium_16way_hash( void *state, const void *midstate_vars,
intrlv_8x64( vhashB, hash8, hash9, hash10, hash11, hash12, hash13, hash14,
hash15, 256 );
skein256_8way_init( &ctx.skein );
skein256_8way_update( &ctx.skein, vhashA, 32 );
skein256_8way_close( &ctx.skein, vhashA );
skein256_8way_init( &ctx.skein );
skein256_8way_update( &ctx.skein, vhashB, 32 );
skein256_8way_close( &ctx.skein, vhashB );
skein256_8x64_init( &ctx.skein );
skein256_8x64_update( &ctx.skein, vhashA, 32 );
skein256_8x64_close( &ctx.skein, vhashA );
skein256_8x64_init( &ctx.skein );
skein256_8x64_update( &ctx.skein, vhashB, 32 );
skein256_8x64_close( &ctx.skein, vhashB );
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhashA, 256 );
@@ -251,7 +251,7 @@ int scanhash_allium_16way( struct work *work, uint32_t max_nonce,
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+ 1, n );
// Partialy prehash second block without touching nonces in block_buf[3].
blake256_16way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
blake256_16x32_round0_prehash_le( midstate_vars, block0_hash, block_buf );
do {
allium_16way_hash( hash, midstate_vars, block0_hash, block_buf );
@@ -273,9 +273,9 @@ int scanhash_allium_16way( struct work *work, uint32_t max_nonce,
#elif defined (ALLIUM_8WAY)
typedef union {
keccak256_4way_context keccak;
keccak256_4x64_context keccak;
cube_2way_context cube;
skein256_4way_context skein;
skein256_4x64_context skein;
#if defined(__VAES__)
groestl256_2way_context groestl;
#else
@@ -298,19 +298,19 @@ static void allium_8way_hash( void *hash, const void *midstate_vars,
uint64_t *hash7 = (uint64_t*)hash+28;
allium_8way_ctx_holder ctx __attribute__ ((aligned (64)));
blake256_8way_final_rounds_le( vhashA, midstate_vars, midhash, block, 14 );
blake256_8x32_final_rounds_le( vhashA, midstate_vars, midhash, block, 14 );
dintrlv_8x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhashA, 256 );
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 256 );
intrlv_4x64( vhashB, hash4, hash5, hash6, hash7, 256 );
keccak256_4way_init( &ctx.keccak );
keccak256_4way_update( &ctx.keccak, vhashA, 32 );
keccak256_4way_close( &ctx.keccak, vhashA );
keccak256_4way_init( &ctx.keccak );
keccak256_4way_update( &ctx.keccak, vhashB, 32 );
keccak256_4way_close( &ctx.keccak, vhashB );
keccak256_4x64_init( &ctx.keccak );
keccak256_4x64_update( &ctx.keccak, vhashA, 32 );
keccak256_4x64_close( &ctx.keccak, vhashA );
keccak256_4x64_init( &ctx.keccak );
keccak256_4x64_update( &ctx.keccak, vhashB, 32 );
keccak256_4x64_close( &ctx.keccak, vhashB );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, 256 );
dintrlv_4x64( hash4, hash5, hash6, hash7, vhashB, 256 );
@@ -350,12 +350,12 @@ static void allium_8way_hash( void *hash, const void *midstate_vars,
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 256 );
intrlv_4x64( vhashB, hash4, hash5, hash6, hash7, 256 );
skein256_4way_init( &ctx.skein );
skein256_4way_update( &ctx.skein, vhashA, 32 );
skein256_4way_close( &ctx.skein, vhashA );
skein256_4way_init( &ctx.skein );
skein256_4way_update( &ctx.skein, vhashB, 32 );
skein256_4way_close( &ctx.skein, vhashB );
skein256_4x64_init( &ctx.skein );
skein256_4x64_update( &ctx.skein, vhashA, 32 );
skein256_4x64_close( &ctx.skein, vhashA );
skein256_4x64_init( &ctx.skein );
skein256_4x64_update( &ctx.skein, vhashB, 32 );
skein256_4x64_close( &ctx.skein, vhashB );
#if defined(__VAES__)
@@ -433,7 +433,7 @@ int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
n+ 3, n+ 2, n+ 1, n );
// Partialy prehash second block without touching nonces
blake256_8way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
blake256_8x32_round0_prehash_le( midstate_vars, block0_hash, block_buf );
do {
allium_8way_hash( hash, midstate_vars, block0_hash, block_buf );
@@ -483,7 +483,7 @@ static void allium_4way_hash( void *hash, const void *midstate_vars,
uint64_t *hash3 = (uint64_t*)hash+12;
allium_4way_ctx_holder ctx __attribute__ ((aligned (64)));
blake256_4way_final_rounds_le( vhashA, midstate_vars, midhash, block, 14 );
blake256_4x32_final_rounds_le( vhashA, midstate_vars, midhash, block, 14 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhashA, 256 );
intrlv_2x64( vhashA, hash0, hash1, 256 );
@@ -588,7 +588,7 @@ int scanhash_allium_4way( struct work *work, uint32_t max_nonce,
block_buf[15] = v128_32( 640 );
// Partialy prehash second block without touching nonces
blake256_4way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
blake256_4x32_round0_prehash_le( midstate_vars, block0_hash, block_buf );
do {
allium_4way_hash( hash, midstate_vars, block0_hash, block_buf );
@@ -616,7 +616,6 @@ int scanhash_allium_4way( struct work *work, uint32_t max_nonce,
//
// 1 way
typedef struct
{
blake256_context blake;

View File

@@ -14,12 +14,12 @@ bool lyra2h_4way_thread_init()
return ( lyra2h_4way_matrix = mm_malloc( LYRA2H_MATRIX_SIZE, 64 ) );
}
static __thread blake256_4way_context l2h_4way_blake_mid;
static __thread blake256_4x32_context l2h_4way_blake_mid;
void lyra2h_4way_midstate( const void* input )
{
blake256_4way_init( &l2h_4way_blake_mid );
blake256_4way_update( &l2h_4way_blake_mid, input, 64 );
blake256_4x32_init( &l2h_4way_blake_mid );
blake256_4x32_update( &l2h_4way_blake_mid, input, 64 );
}
void lyra2h_4way_hash( void *state, const void *input )
@@ -29,11 +29,11 @@ void lyra2h_4way_hash( void *state, const void *input )
uint32_t hash2[8] __attribute__ ((aligned (64)));
uint32_t hash3[8] __attribute__ ((aligned (64)));
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
blake256_4way_context ctx_blake __attribute__ ((aligned (64)));
blake256_4x32_context ctx_blake __attribute__ ((aligned (64)));
memcpy( &ctx_blake, &l2h_4way_blake_mid, sizeof l2h_4way_blake_mid );
blake256_4way_update( &ctx_blake, input + (64*4), 16 );
blake256_4way_close( &ctx_blake, vhash );
blake256_4x32_update( &ctx_blake, input + (64*4), 16 );
blake256_4x32_close( &ctx_blake, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );

View File

@@ -7,25 +7,24 @@
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/cubehash/cube-hash-2way.h"
#if defined (LYRA2REV2_16WAY)
typedef struct {
blake256_16way_context blake;
keccak256_8way_context keccak;
blake256_16x32_context blake;
keccak256_8x64_context keccak;
cubehashParam cube;
skein256_8way_context skein;
bmw256_16way_context bmw;
skein256_8x64_context skein;
bmw256_16x32_context bmw;
} lyra2v2_16way_ctx_holder __attribute__ ((aligned (64)));
static lyra2v2_16way_ctx_holder l2v2_16way_ctx;
bool init_lyra2rev2_16way_ctx()
{
keccak256_8way_init( &l2v2_16way_ctx.keccak );
keccak256_8x64_init( &l2v2_16way_ctx.keccak );
cubehashInit( &l2v2_16way_ctx.cube, 256, 16, 32 );
skein256_8way_init( &l2v2_16way_ctx.skein );
bmw256_16way_init( &l2v2_16way_ctx.bmw );
skein256_8x64_init( &l2v2_16way_ctx.skein );
bmw256_16x32_init( &l2v2_16way_ctx.bmw );
return true;
}
@@ -51,8 +50,8 @@ void lyra2rev2_16way_hash( void *state, const void *input )
lyra2v2_16way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v2_16way_ctx, sizeof(l2v2_16way_ctx) );
blake256_16way_update( &ctx.blake, input + (64<<4), 16 );
blake256_16way_close( &ctx.blake, vhash );
blake256_16x32_update( &ctx.blake, input + (64<<4), 16 );
blake256_16x32_close( &ctx.blake, vhash );
dintrlv_16x32( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7,
@@ -62,17 +61,17 @@ void lyra2rev2_16way_hash( void *state, const void *input )
intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, 256 );
keccak256_8way_update( &ctx.keccak, vhash, 32 );
keccak256_8way_close( &ctx.keccak, vhash );
keccak256_8x64_update( &ctx.keccak, vhash, 32 );
keccak256_8x64_close( &ctx.keccak, vhash );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
intrlv_8x64( vhash, hash8, hash9, hash10, hash11,
hash12, hash13, hash14, hash15, 256 );
keccak256_8way_init( &ctx.keccak );
keccak256_8way_update( &ctx.keccak, vhash, 32 );
keccak256_8way_close( &ctx.keccak, vhash );
keccak256_8x64_init( &ctx.keccak );
keccak256_8x64_update( &ctx.keccak, vhash, 32 );
keccak256_8x64_close( &ctx.keccak, vhash );
dintrlv_8x64( hash8, hash9, hash10, hash11,
hash12, hash13, hash14, hash15, vhash, 256 );
@@ -122,21 +121,20 @@ void lyra2rev2_16way_hash( void *state, const void *input )
intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, 256 );
skein256_8way_update( &ctx.skein, vhash, 32 );
skein256_8way_close( &ctx.skein, vhash );
skein256_8x64_update( &ctx.skein, vhash, 32 );
skein256_8x64_close( &ctx.skein, vhash );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
intrlv_8x64( vhash, hash8, hash9, hash10, hash11, hash12,
hash13, hash14, hash15, 256 );
skein256_8way_init( &ctx.skein );
skein256_8way_update( &ctx.skein, vhash, 32 );
skein256_8way_close( &ctx.skein, vhash );
skein256_8x64_init( &ctx.skein );
skein256_8x64_update( &ctx.skein, vhash, 32 );
skein256_8x64_close( &ctx.skein, vhash );
dintrlv_8x64( hash8, hash9, hash10, hash11,
hash12, hash13, hash14, hash15, vhash, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
@@ -160,8 +158,8 @@ void lyra2rev2_16way_hash( void *state, const void *input )
hash8, hash9, hash10, hash11,
hash12, hash13, hash14, hash15, 256 );
bmw256_16way_update( &ctx.bmw, vhash, 32 );
bmw256_16way_close( &ctx.bmw, state );
bmw256_16x32_update( &ctx.bmw, vhash, 32 );
bmw256_16x32_close( &ctx.bmw, state );
}
int scanhash_lyra2rev2_16way( struct work *work, const uint32_t max_nonce,
@@ -186,8 +184,8 @@ int scanhash_lyra2rev2_16way( struct work *work, const uint32_t max_nonce,
mm512_bswap32_intrlv80_16x32( vdata, pdata );
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+ 1, n );
blake256_16way_init( &l2v2_16way_ctx.blake );
blake256_16way_update( &l2v2_16way_ctx.blake, vdata, 64 );
blake256_16x32_init( &l2v2_16way_ctx.blake );
blake256_16x32_update( &l2v2_16way_ctx.blake, vdata, 64 );
do
{
@@ -214,21 +212,21 @@ int scanhash_lyra2rev2_16way( struct work *work, const uint32_t max_nonce,
#elif defined (LYRA2REV2_8WAY)
typedef struct {
blake256_8way_context blake;
keccak256_4way_context keccak;
blake256_8x32_context blake;
keccak256_4x64_context keccak;
cubehashParam cube;
skein256_4way_context skein;
bmw256_8way_context bmw;
skein256_4x64_context skein;
bmw256_8x32_context bmw;
} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
bool init_lyra2rev2_8way_ctx()
{
keccak256_4way_init( &l2v2_8way_ctx.keccak );
keccak256_4x64_init( &l2v2_8way_ctx.keccak );
cubehashInit( &l2v2_8way_ctx.cube, 256, 16, 32 );
skein256_4way_init( &l2v2_8way_ctx.skein );
bmw256_8way_init( &l2v2_8way_ctx.bmw );
skein256_4x64_init( &l2v2_8way_ctx.skein );
bmw256_8x32_init( &l2v2_8way_ctx.bmw );
return true;
}
@@ -246,20 +244,20 @@ void lyra2rev2_8way_hash( void *state, const void *input )
lyra2v2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v2_8way_ctx, sizeof(l2v2_8way_ctx) );
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
blake256_8way_close( &ctx.blake, vhash );
blake256_8x32_update( &ctx.blake, input + (64<<3), 16 );
blake256_8x32_close( &ctx.blake, vhash );
dintrlv_8x32( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
keccak256_4way_update( &ctx.keccak, vhash, 32 );
keccak256_4way_close( &ctx.keccak, vhash );
keccak256_4x64_update( &ctx.keccak, vhash, 32 );
keccak256_4x64_close( &ctx.keccak, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
keccak256_4way_init( &ctx.keccak );
keccak256_4way_update( &ctx.keccak, vhash, 32 );
keccak256_4way_close( &ctx.keccak, vhash );
keccak256_4x64_init( &ctx.keccak );
keccak256_4x64_update( &ctx.keccak, vhash, 32 );
keccak256_4x64_close( &ctx.keccak, vhash );
dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
@@ -282,13 +280,13 @@ void lyra2rev2_8way_hash( void *state, const void *input )
LYRA2REV2( l2v2_wholeMatrix, hash7, 32, hash7, 32, hash7, 32, 1, 4, 4 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
skein256_4way_update( &ctx.skein, vhash, 32 );
skein256_4way_close( &ctx.skein, vhash );
skein256_4x64_update( &ctx.skein, vhash, 32 );
skein256_4x64_close( &ctx.skein, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
skein256_4way_init( &ctx.skein );
skein256_4way_update( &ctx.skein, vhash, 32 );
skein256_4way_close( &ctx.skein, vhash );
skein256_4x64_init( &ctx.skein );
skein256_4x64_update( &ctx.skein, vhash, 32 );
skein256_4x64_close( &ctx.skein, vhash );
dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
@@ -303,8 +301,8 @@ void lyra2rev2_8way_hash( void *state, const void *input )
intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, 256 );
bmw256_8way_update( &ctx.bmw, vhash, 32 );
bmw256_8way_close( &ctx.bmw, state );
bmw256_8x32_update( &ctx.bmw, vhash, 32 );
bmw256_8x32_close( &ctx.bmw, state );
}
int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
@@ -328,8 +326,8 @@ int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
mm256_bswap32_intrlv80_8x32( vdata, pdata );
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
blake256_8way_init( &l2v2_8way_ctx.blake );
blake256_8way_update( &l2v2_8way_ctx.blake, vdata, 64 );
blake256_8x32_init( &l2v2_8way_ctx.blake );
blake256_8x32_update( &l2v2_8way_ctx.blake, vdata, 64 );
do
{
@@ -356,21 +354,21 @@ int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
#elif defined (LYRA2REV2_4WAY)
typedef struct {
blake256_4way_context blake;
keccak256_4way_context keccak;
blake256_4x32_context blake;
keccak256_4x64_context keccak;
cubehashParam cube;
skein256_4way_context skein;
bmw256_4way_context bmw;
skein256_4x64_context skein;
bmw256_4x32_context bmw;
} lyra2v2_4way_ctx_holder;
static lyra2v2_4way_ctx_holder l2v2_4way_ctx;
bool init_lyra2rev2_4way_ctx()
{
keccak256_4way_init( &l2v2_4way_ctx.keccak );
keccak256_4x64_init( &l2v2_4way_ctx.keccak );
cubehashInit( &l2v2_4way_ctx.cube, 256, 16, 32 );
skein256_4way_init( &l2v2_4way_ctx.skein );
bmw256_4way_init( &l2v2_4way_ctx.bmw );
skein256_4x64_init( &l2v2_4way_ctx.skein );
bmw256_4x32_init( &l2v2_4way_ctx.bmw );
return true;
}
@@ -385,13 +383,13 @@ void lyra2rev2_4way_hash( void *state, const void *input )
lyra2v2_4way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v2_4way_ctx, sizeof(l2v2_4way_ctx) );
blake256_4way_update( &ctx.blake, input + (64<<2), 16 );
blake256_4way_close( &ctx.blake, vhash );
blake256_4x32_update( &ctx.blake, input + (64<<2), 16 );
blake256_4x32_close( &ctx.blake, vhash );
rintrlv_4x32_4x64( vhash64, vhash, 256 );
keccak256_4way_update( &ctx.keccak, vhash64, 32 );
keccak256_4way_close( &ctx.keccak, vhash64 );
keccak256_4x64_update( &ctx.keccak, vhash64, 32 );
keccak256_4x64_close( &ctx.keccak, vhash64 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
@@ -410,8 +408,8 @@ void lyra2rev2_4way_hash( void *state, const void *input )
intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
skein256_4way_update( &ctx.skein, vhash64, 32 );
skein256_4way_close( &ctx.skein, vhash64 );
skein256_4x64_update( &ctx.skein, vhash64, 32 );
skein256_4x64_close( &ctx.skein, vhash64 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
@@ -426,8 +424,8 @@ void lyra2rev2_4way_hash( void *state, const void *input )
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
bmw256_4way_update( &ctx.bmw, vhash, 32 );
bmw256_4way_close( &ctx.bmw, state );
bmw256_4x32_update( &ctx.bmw, vhash, 32 );
bmw256_4x32_close( &ctx.bmw, state );
}
int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
@@ -451,8 +449,8 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
v128_bswap32_intrlv80_4x32( vdata, pdata );
blake256_4way_init( &l2v2_4way_ctx.blake );
blake256_4way_update( &l2v2_4way_ctx.blake, vdata, 64 );
blake256_4x32_init( &l2v2_4way_ctx.blake );
blake256_4x32_update( &l2v2_4way_ctx.blake, vdata, 64 );
do
{

View File

@@ -9,18 +9,18 @@
#if defined (LYRA2REV3_16WAY)
typedef struct {
blake256_16way_context blake;
blake256_16x32_context blake;
cube_4way_context cube;
bmw256_16way_context bmw;
bmw256_16x32_context bmw;
} lyra2v3_16way_ctx_holder;
static __thread lyra2v3_16way_ctx_holder l2v3_16way_ctx;
bool init_lyra2rev3_16way_ctx()
{
blake256_16way_init( &l2v3_16way_ctx.blake );
blake256_16x32_init( &l2v3_16way_ctx.blake );
cube_4way_init( &l2v3_16way_ctx.cube, 256, 16, 32 );
bmw256_16way_init( &l2v3_16way_ctx.bmw );
bmw256_16x32_init( &l2v3_16way_ctx.bmw );
return true;
}
@@ -46,8 +46,8 @@ void lyra2rev3_16way_hash( void *state, const void *input )
lyra2v3_16way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v3_16way_ctx, sizeof(l2v3_16way_ctx) );
blake256_16way_update( &ctx.blake, input + (64*16), 16 );
blake256_16way_close( &ctx.blake, vhash );
blake256_16x32_update( &ctx.blake, input + (64*16), 16 );
blake256_16x32_close( &ctx.blake, vhash );
dintrlv_16x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
hash8, hash9, hash10, hash11 ,hash12, hash13, hash14, hash15,
@@ -120,8 +120,8 @@ void lyra2rev3_16way_hash( void *state, const void *input )
hash7, hash8, hash9, hash10, hash11, hash12, hash13, hash14,
hash15, 256 );
bmw256_16way_update( &ctx.bmw, vhash, 32 );
bmw256_16way_close( &ctx.bmw, state );
bmw256_16x32_update( &ctx.bmw, vhash, 32 );
bmw256_16x32_close( &ctx.bmw, state );
}
@@ -145,8 +145,8 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
mm512_bswap32_intrlv80_16x32( vdata, pdata );
blake256_16way_init( &l2v3_16way_ctx.blake );
blake256_16way_update( &l2v3_16way_ctx.blake, vdata, 64 );
blake256_16x32_init( &l2v3_16way_ctx.blake );
blake256_16x32_update( &l2v3_16way_ctx.blake, vdata, 64 );
do
{
@@ -178,18 +178,18 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
#elif defined (LYRA2REV3_8WAY)
typedef struct {
blake256_8way_context blake;
blake256_8x32_context blake;
cubehashParam cube;
bmw256_8way_context bmw;
bmw256_8x32_context bmw;
} lyra2v3_8way_ctx_holder;
static __thread lyra2v3_8way_ctx_holder l2v3_8way_ctx;
bool init_lyra2rev3_8way_ctx()
{
blake256_8way_init( &l2v3_8way_ctx.blake );
blake256_8x32_init( &l2v3_8way_ctx.blake );
cubehashInit( &l2v3_8way_ctx.cube, 256, 16, 32 );
bmw256_8way_init( &l2v3_8way_ctx.bmw );
bmw256_8x32_init( &l2v3_8way_ctx.bmw );
return true;
}
@@ -207,8 +207,8 @@ void lyra2rev3_8way_hash( void *state, const void *input )
lyra2v3_8way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v3_8way_ctx, sizeof(l2v3_8way_ctx) );
blake256_8way_update( &ctx.blake, input + (64*8), 16 );
blake256_8way_close( &ctx.blake, vhash );
blake256_8x32_update( &ctx.blake, input + (64*8), 16 );
blake256_8x32_close( &ctx.blake, vhash );
dintrlv_8x32( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
@@ -243,8 +243,8 @@ void lyra2rev3_8way_hash( void *state, const void *input )
intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, 256 );
bmw256_8way_update( &ctx.bmw, vhash, 32 );
bmw256_8way_close( &ctx.bmw, state );
bmw256_8x32_update( &ctx.bmw, vhash, 32 );
bmw256_8x32_close( &ctx.bmw, state );
}
@@ -269,8 +269,8 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
mm256_bswap32_intrlv80_8x32( vdata, pdata );
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
blake256_8way_init( &l2v3_8way_ctx.blake );
blake256_8way_update( &l2v3_8way_ctx.blake, vdata, 64 );
blake256_8x32_init( &l2v3_8way_ctx.blake );
blake256_8x32_update( &l2v3_8way_ctx.blake, vdata, 64 );
do
{
@@ -300,19 +300,18 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
#if defined (LYRA2REV3_4WAY)
typedef struct {
blake256_4way_context blake;
blake256_4x32_context blake;
cubehashParam cube;
bmw256_4way_context bmw;
bmw256_4x32_context bmw;
} lyra2v3_4way_ctx_holder;
//static lyra2v3_4way_ctx_holder l2v3_4way_ctx;
static __thread lyra2v3_4way_ctx_holder l2v3_4way_ctx;
bool init_lyra2rev3_4way_ctx()
{
blake256_4way_init( &l2v3_4way_ctx.blake );
blake256_4x32_init( &l2v3_4way_ctx.blake );
cubehashInit( &l2v3_4way_ctx.cube, 256, 16, 32 );
bmw256_4way_init( &l2v3_4way_ctx.bmw );
bmw256_4x32_init( &l2v3_4way_ctx.bmw );
return true;
}
@@ -326,8 +325,8 @@ void lyra2rev3_4way_hash( void *state, const void *input )
lyra2v3_4way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v3_4way_ctx, sizeof(l2v3_4way_ctx) );
blake256_4way_update( &ctx.blake, input + (64*4), 16 );
blake256_4way_close( &ctx.blake, vhash );
blake256_4x32_update( &ctx.blake, input + (64*4), 16 );
blake256_4x32_close( &ctx.blake, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
LYRA2REV3( l2v3_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
@@ -349,8 +348,8 @@ void lyra2rev3_4way_hash( void *state, const void *input )
LYRA2REV3( l2v3_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
bmw256_4way_update( &ctx.bmw, vhash, 32 );
bmw256_4way_close( &ctx.bmw, state );
bmw256_4x32_update( &ctx.bmw, vhash, 32 );
bmw256_4x32_close( &ctx.bmw, state );
}
int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce,
@@ -374,8 +373,8 @@ int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce,
v128_bswap32_intrlv80_4x32( vdata, pdata );
*noncev = _mm_set_epi32( n+3, n+2, n+1, n );
blake256_4way_init( &l2v3_4way_ctx.blake );
blake256_4way_update( &l2v3_4way_ctx.blake, vdata, 64 );
blake256_4x32_init( &l2v3_4way_ctx.blake );
blake256_4x32_update( &l2v3_4way_ctx.blake, vdata, 64 );
do
{

View File

@@ -45,7 +45,7 @@ static void lyra2z_16way_hash( void *state, const void *midstate_vars,
uint32_t hash14[8] __attribute__ ((aligned (32)));
uint32_t hash15[8] __attribute__ ((aligned (32)));
blake256_16way_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
blake256_16x32_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
dintrlv_16x32( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
hash8, hash9, hash10, hash11 ,hash12, hash13, hash14, hash15,
@@ -139,7 +139,7 @@ int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce,
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n +1, n );
// Partialy prehash second block without touching nonces in block_buf[3].
blake256_16way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
blake256_16x32_round0_prehash_le( midstate_vars, block0_hash, block_buf );
do {
lyra2z_16way_hash( hash, midstate_vars, block0_hash, block_buf );
@@ -180,7 +180,7 @@ static void lyra2z_8way_hash( void *state, const void *midstate_vars,
uint32_t hash7[8] __attribute__ ((aligned (32)));
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
blake256_8way_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
blake256_8x32_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
dintrlv_8x32( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
@@ -246,7 +246,7 @@ int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
_mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n +1, n );
// Partialy prehash second block without touching nonces
blake256_8way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
blake256_8x32_round0_prehash_le( midstate_vars, block0_hash, block_buf );
do {
lyra2z_8way_hash( hash, midstate_vars, block0_hash, block_buf );
@@ -279,12 +279,12 @@ bool lyra2z_4way_thread_init()
return ( lyra2z_4way_matrix = mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
}
static __thread blake256_4way_context l2z_4way_blake_mid;
static __thread blake256_4x32_context l2z_4way_blake_mid;
void lyra2z_4way_midstate( const void* input )
{
blake256_4way_init( &l2z_4way_blake_mid );
blake256_4way_update( &l2z_4way_blake_mid, input, 64 );
blake256_4x32_init( &l2z_4way_blake_mid );
blake256_4x32_update( &l2z_4way_blake_mid, input, 64 );
}
void lyra2z_4way_hash( void *hash, const void *midstate_vars,
@@ -295,15 +295,8 @@ void lyra2z_4way_hash( void *hash, const void *midstate_vars,
uint32_t hash2[8] __attribute__ ((aligned (64)));
uint32_t hash3[8] __attribute__ ((aligned (64)));
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
// blake256_4way_context ctx_blake __attribute__ ((aligned (64)));
blake256_4way_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
/*
memcpy( &ctx_blake, &l2z_4way_blake_mid, sizeof l2z_4way_blake_mid );
blake256_4way_update( &ctx_blake, input + (64*4), 16 );
blake256_4way_close( &ctx_blake, vhash );
*/
blake256_4x32_final_rounds_le( vhash, midstate_vars, midhash, block, 14 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
@@ -357,7 +350,7 @@ int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce,
block_buf[15] = v128_32( 640 );
// Partialy prehash second block without touching nonces
blake256_4way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
blake256_4x32_round0_prehash_le( midstate_vars, block0_hash, block_buf );
do {
lyra2z_4way_hash( hash, midstate_vars, block0_hash, block_buf );
@@ -454,11 +447,9 @@ bool register_lyra2z_algo( algo_gate_t* gate )
#if defined(LYRA2Z_16WAY)
gate->miner_thread_init = (void*)&lyra2z_16way_thread_init;
gate->scanhash = (void*)&scanhash_lyra2z_16way;
// gate->hash = (void*)&lyra2z_16way_hash;
#elif defined(LYRA2Z_8WAY)
gate->miner_thread_init = (void*)&lyra2z_8way_thread_init;
gate->scanhash = (void*)&scanhash_lyra2z_8way;
// gate->hash = (void*)&lyra2z_8way_hash;
#elif defined(LYRA2Z_4WAY)
gate->miner_thread_init = (void*)&lyra2z_4way_thread_init;
gate->scanhash = (void*)&scanhash_lyra2z_4way;

View File

@@ -45,7 +45,7 @@ static const uint64_t blake2b_IV[8] =
#if defined(SIMD512)
#define G2W_4X64(a,b,c,d) \
#define G2W(a,b,c,d) \
a = _mm512_add_epi64( a, b ); \
d = _mm512_ror_epi64( _mm512_xor_si512( d, a ), 32 ); \
c = _mm512_add_epi64( c, d ); \
@@ -56,27 +56,15 @@ static const uint64_t blake2b_IV[8] =
b = _mm512_ror_epi64( _mm512_xor_si512( b, c ), 63 );
#define LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \
G2W_4X64( s0, s1, s2, s3 ); \
G2W( s0, s1, s2, s3 ); \
s0 = mm512_shufll256_64( s0 ); \
s3 = mm512_swap256_128( s3); \
s3 = mm512_swap256_128( s3 ); \
s2 = mm512_shuflr256_64( s2 ); \
G2W_4X64( s0, s1, s2, s3 ); \
G2W( s0, s1, s2, s3 ); \
s0 = mm512_shuflr256_64( s0 ); \
s3 = mm512_swap256_128( s3 ); \
s2 = mm512_shufll256_64( s2 );
/*
#define LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \
G2W_4X64( s0, s1, s2, s3 ); \
s3 = mm512_shufll256_64( s3 ); \
s1 = mm512_shuflr256_64( s1); \
s2 = mm512_swap256_128( s2 ); \
G2W_4X64( s0, s1, s2, s3 ); \
s3 = mm512_shuflr256_64( s3 ); \
s1 = mm512_shufll256_64( s1 ); \
s2 = mm512_swap256_128( s2 );
*/
#define LYRA_12_ROUNDS_2WAY_AVX512( s0, s1, s2, s3 ) \
LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \
LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \
@@ -95,7 +83,7 @@ static const uint64_t blake2b_IV[8] =
#if defined(__AVX2__)
#define G_4X64(a,b,c,d) \
#define G_AVX2(a,b,c,d) \
a = _mm256_add_epi64( a, b ); \
d = mm256_ror_64( _mm256_xor_si256( d, a ), 32 ); \
c = _mm256_add_epi64( c, d ); \
@@ -107,27 +95,15 @@ static const uint64_t blake2b_IV[8] =
// Pivot about s1 instead of s0 reduces latency.
#define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
G_4X64( s0, s1, s2, s3 ); \
G_AVX2( s0, s1, s2, s3 ); \
s0 = mm256_shufll_64( s0 ); \
s3 = mm256_swap_128( s3); \
s3 = mm256_swap_128( s3 ); \
s2 = mm256_shuflr_64( s2 ); \
G_4X64( s0, s1, s2, s3 ); \
G_AVX2( s0, s1, s2, s3 ); \
s0 = mm256_shuflr_64( s0 ); \
s3 = mm256_swap_128( s3 ); \
s2 = mm256_shufll_64( s2 );
/*
#define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
G_4X64( s0, s1, s2, s3 ); \
s3 = mm256_shufll_64( s3 ); \
s1 = mm256_shuflr_64( s1); \
s2 = mm256_swap_128( s2 ); \
G_4X64( s0, s1, s2, s3 ); \
s3 = mm256_shuflr_64( s3 ); \
s1 = mm256_shufll_64( s1 ); \
s2 = mm256_swap_128( s2 );
*/
#define LYRA_12_ROUNDS_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
@@ -148,7 +124,7 @@ static const uint64_t blake2b_IV[8] =
// process 2 columns in parallel
// returns void, all args updated
#define G_2X64(a,b,c,d) \
#define G_128(a,b,c,d) \
a = v128_add64( a, b ); \
d = v128_ror64xor( d, a, 32 ); \
c = v128_add64( c, d ); \
@@ -161,16 +137,16 @@ static const uint64_t blake2b_IV[8] =
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
{ \
v128u64_t t; \
G_2X64( s0, s2, s4, s6 ); \
G_2X64( s1, s3, s5, s7 ); \
G_128( s0, s2, s4, s6 ); \
G_128( s1, s3, s5, s7 ); \
t = v128_alignr64( s7, s6, 1 ); \
s6 = v128_alignr64( s6, s7, 1 ); \
s7 = t; \
t = v128_alignr64( s2, s3, 1 ); \
s2 = v128_alignr64( s3, s2, 1 ); \
s3 = t; \
G_2X64( s0, s2, s5, s6 ); \
G_2X64( s1, s3, s4, s7 ); \
G_128( s0, s2, s5, s6 ); \
G_128( s1, s3, s4, s7 ); \
t = v128_alignr64( s6, s7, 1 ); \
s6 = v128_alignr64( s7, s6, 1 ); \
s7 = t; \