mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v25.4
This commit is contained in:
@@ -6,23 +6,23 @@
|
||||
|
||||
#if defined (SKEIN_8WAY)
|
||||
|
||||
static __thread skein512_8way_context skein512_8way_ctx
|
||||
static __thread skein512_8x64_context skein512_8x64_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void skeinhash_8way( void *state, const void *input )
|
||||
{
|
||||
uint64_t vhash64[8*8] __attribute__ ((aligned (128)));
|
||||
skein512_8way_context ctx_skein;
|
||||
memcpy( &ctx_skein, &skein512_8way_ctx, sizeof( ctx_skein ) );
|
||||
skein512_8x64_context ctx_skein;
|
||||
memcpy( &ctx_skein, &skein512_8x64_ctx, sizeof( ctx_skein ) );
|
||||
uint32_t vhash32[16*8] __attribute__ ((aligned (128)));
|
||||
sha256_8way_context ctx_sha256;
|
||||
sha256_8x32_context ctx_sha256;
|
||||
|
||||
skein512_8way_final16( &ctx_skein, vhash64, input + (64*8) );
|
||||
skein512_8x64_final16( &ctx_skein, vhash64, input + (64*8) );
|
||||
rintrlv_8x64_8x32( vhash32, vhash64, 512 );
|
||||
|
||||
sha256_8way_init( &ctx_sha256 );
|
||||
sha256_8way_update( &ctx_sha256, vhash32, 64 );
|
||||
sha256_8way_close( &ctx_sha256, state );
|
||||
sha256_8x32_init( &ctx_sha256 );
|
||||
sha256_8x32_update( &ctx_sha256, vhash32, 64 );
|
||||
sha256_8x32_close( &ctx_sha256, state );
|
||||
}
|
||||
|
||||
int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
|
||||
@@ -46,7 +46,7 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
skein512_8way_prehash64( &skein512_8way_ctx, vdata );
|
||||
skein512_8x64_prehash64( &skein512_8x64_ctx, vdata );
|
||||
do
|
||||
{
|
||||
skeinhash_8way( hash, vdata );
|
||||
@@ -73,14 +73,14 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#elif defined (SKEIN_4WAY)
|
||||
|
||||
static __thread skein512_4way_context skein512_4way_ctx
|
||||
static __thread skein512_4x64_context skein512_4x64_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void skeinhash_4way( void *state, const void *input )
|
||||
{
|
||||
uint64_t vhash64[8*4] __attribute__ ((aligned (128)));
|
||||
skein512_4way_context ctx_skein;
|
||||
memcpy( &ctx_skein, &skein512_4way_ctx, sizeof( ctx_skein ) );
|
||||
skein512_4x64_context ctx_skein;
|
||||
memcpy( &ctx_skein, &skein512_4x64_ctx, sizeof( ctx_skein ) );
|
||||
#if defined(__SHA__)
|
||||
uint32_t hash0[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[16] __attribute__ ((aligned (64)));
|
||||
@@ -88,10 +88,10 @@ void skeinhash_4way( void *state, const void *input )
|
||||
uint32_t hash3[16] __attribute__ ((aligned (64)));
|
||||
#else
|
||||
uint32_t vhash32[16*4] __attribute__ ((aligned (64)));
|
||||
sha256_4way_context ctx_sha256;
|
||||
sha256_4x32_context ctx_sha256;
|
||||
#endif
|
||||
|
||||
skein512_4way_final16( &ctx_skein, vhash64, input + (64*4) );
|
||||
skein512_4x64_final16( &ctx_skein, vhash64, input + (64*4) );
|
||||
|
||||
#if defined(__SHA__)
|
||||
|
||||
@@ -107,9 +107,9 @@ void skeinhash_4way( void *state, const void *input )
|
||||
#else
|
||||
|
||||
rintrlv_4x64_4x32( vhash32, vhash64, 512 );
|
||||
sha256_4way_init( &ctx_sha256 );
|
||||
sha256_4way_update( &ctx_sha256, vhash32, 64 );
|
||||
sha256_4way_close( &ctx_sha256, state );
|
||||
sha256_4x32_init( &ctx_sha256 );
|
||||
sha256_4x32_update( &ctx_sha256, vhash32, 64 );
|
||||
sha256_4x32_close( &ctx_sha256, state );
|
||||
|
||||
#endif
|
||||
}
|
||||
@@ -132,7 +132,7 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_prehash64( &skein512_4way_ctx, vdata );
|
||||
skein512_4x64_prehash64( &skein512_4x64_ctx, vdata );
|
||||
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
|
||||
@@ -513,7 +513,7 @@ do { \
|
||||
|
||||
#if defined(SIMD512)
|
||||
|
||||
void skein256_8way_init( skein256_8way_context *sc )
|
||||
void skein256_8x64_init( skein256_8x64_context *sc )
|
||||
{
|
||||
sc->h0 = _mm512_set1_epi64( 0xCCD044A12FDB3E13 );
|
||||
sc->h1 = _mm512_set1_epi64( 0xE83590301A79A9EB );
|
||||
@@ -527,7 +527,7 @@ void skein256_8way_init( skein256_8way_context *sc )
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
void skein512_8way_init( skein512_8way_context *sc )
|
||||
void skein512_8x64_init( skein512_8x64_context *sc )
|
||||
{
|
||||
sc->h0 = _mm512_set1_epi64( 0x4903ADFF749C51CE );
|
||||
sc->h1 = _mm512_set1_epi64( 0x0D95DE399746DF03 );
|
||||
@@ -542,7 +542,7 @@ void skein512_8way_init( skein512_8way_context *sc )
|
||||
}
|
||||
|
||||
static void
|
||||
skein_big_core_8way( skein512_8way_context *sc, const void *data,
|
||||
skein_big_core_8x64( skein512_8x64_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
@@ -587,7 +587,7 @@ skein_big_core_8way( skein512_8way_context *sc, const void *data,
|
||||
}
|
||||
|
||||
static void
|
||||
skein_big_close_8way( skein512_8way_context *sc, unsigned ub, unsigned n,
|
||||
skein_big_close_8x64( skein512_8x64_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_len )
|
||||
{
|
||||
__m512i *buf;
|
||||
@@ -621,7 +621,7 @@ skein_big_close_8way( skein512_8way_context *sc, unsigned ub, unsigned n,
|
||||
memcpy_512( dst, buf, out_len >> 3 );
|
||||
}
|
||||
|
||||
void skein512_8way_full( skein512_8way_context *sc, void *out, const void *data,
|
||||
void skein512_8x64_full( skein512_8x64_context *sc, void *out, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m512i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
@@ -698,7 +698,7 @@ void skein512_8way_full( skein512_8way_context *sc, void *out, const void *data,
|
||||
}
|
||||
|
||||
void
|
||||
skein512_8way_prehash64( skein512_8way_context *sc, const void *data )
|
||||
skein512_8x64_prehash64( skein512_8x64_context *sc, const void *data )
|
||||
{
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
__m512i *buf = sc->buf;
|
||||
@@ -732,7 +732,7 @@ skein512_8way_prehash64( skein512_8way_context *sc, const void *data )
|
||||
}
|
||||
|
||||
void
|
||||
skein512_8way_final16( skein512_8way_context *sc, void *output,
|
||||
skein512_8x64_final16( skein512_8x64_context *sc, void *output,
|
||||
const void *data )
|
||||
{
|
||||
__m512i *in = (__m512i*)data;
|
||||
@@ -778,34 +778,34 @@ skein512_8way_final16( skein512_8way_context *sc, void *output,
|
||||
|
||||
|
||||
void
|
||||
skein256_8way_update(void *cc, const void *data, size_t len)
|
||||
skein256_8x64_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
skein_big_core_8way(cc, data, len);
|
||||
skein_big_core_8x64(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
skein256_8way_close(void *cc, void *dst)
|
||||
skein256_8x64_close(void *cc, void *dst)
|
||||
{
|
||||
skein_big_close_8way(cc, 0, 0, dst, 32);
|
||||
skein_big_close_8x64(cc, 0, 0, dst, 32);
|
||||
}
|
||||
|
||||
void
|
||||
skein512_8way_update(void *cc, const void *data, size_t len)
|
||||
skein512_8x64_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
skein_big_core_8way(cc, data, len);
|
||||
skein_big_core_8x64(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
skein512_8way_close(void *cc, void *dst)
|
||||
skein512_8x64_close(void *cc, void *dst)
|
||||
{
|
||||
skein_big_close_8way(cc, 0, 0, dst, 64);
|
||||
skein_big_close_8x64(cc, 0, 0, dst, 64);
|
||||
}
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
void skein256_4way_init( skein256_4way_context *sc )
|
||||
void skein256_4x64_init( skein256_4x64_context *sc )
|
||||
{
|
||||
sc->h0 = _mm256_set1_epi64x( 0xCCD044A12FDB3E13 );
|
||||
sc->h1 = _mm256_set1_epi64x( 0xE83590301A79A9EB );
|
||||
@@ -819,7 +819,7 @@ void skein256_4way_init( skein256_4way_context *sc )
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
void skein512_4way_init( skein512_4way_context *sc )
|
||||
void skein512_4x64_init( skein512_4x64_context *sc )
|
||||
{
|
||||
sc->h0 = _mm256_set1_epi64x( 0x4903ADFF749C51CE );
|
||||
sc->h1 = _mm256_set1_epi64x( 0x0D95DE399746DF03 );
|
||||
@@ -835,7 +835,7 @@ void skein512_4way_init( skein512_4way_context *sc )
|
||||
|
||||
// Do not use for 128 bt data length
|
||||
static void
|
||||
skein_big_core_4way( skein512_4way_context *sc, const void *data,
|
||||
skein_big_core_4x64( skein512_4x64_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
@@ -882,7 +882,7 @@ skein_big_core_4way( skein512_4way_context *sc, const void *data,
|
||||
}
|
||||
|
||||
static void
|
||||
skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
||||
skein_big_close_4x64( skein512_4x64_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_len )
|
||||
{
|
||||
__m256i *buf;
|
||||
@@ -920,7 +920,7 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
||||
}
|
||||
|
||||
void
|
||||
skein512_4way_full( skein512_4way_context *sc, void *out, const void *data,
|
||||
skein512_4x64_full( skein512_4x64_context *sc, void *out, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m256i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
@@ -995,7 +995,7 @@ skein512_4way_full( skein512_4way_context *sc, void *out, const void *data,
|
||||
}
|
||||
|
||||
void
|
||||
skein512_4way_prehash64( skein512_4way_context *sc, const void *data )
|
||||
skein512_4x64_prehash64( skein512_4x64_context *sc, const void *data )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buf = sc->buf;
|
||||
@@ -1029,7 +1029,7 @@ skein512_4way_prehash64( skein512_4way_context *sc, const void *data )
|
||||
}
|
||||
|
||||
void
|
||||
skein512_4way_final16( skein512_4way_context *sc, void *out, const void *data )
|
||||
skein512_4x64_final16( skein512_4x64_context *sc, void *out, const void *data )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buf = sc->buf;
|
||||
@@ -1073,29 +1073,29 @@ skein512_4way_final16( skein512_4way_context *sc, void *out, const void *data )
|
||||
|
||||
// Broken for 80 bytes, use prehash.
|
||||
void
|
||||
skein256_4way_update(void *cc, const void *data, size_t len)
|
||||
skein256_4x64_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
skein_big_core_4way(cc, data, len);
|
||||
skein_big_core_4x64(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
skein256_4way_close(void *cc, void *dst)
|
||||
skein256_4x64_close(void *cc, void *dst)
|
||||
{
|
||||
skein_big_close_4way(cc, 0, 0, dst, 32);
|
||||
skein_big_close_4x64(cc, 0, 0, dst, 32);
|
||||
}
|
||||
|
||||
|
||||
// Broken for 80 & 128 bytes, use prehash or full
|
||||
void
|
||||
skein512_4way_update(void *cc, const void *data, size_t len)
|
||||
skein512_4x64_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
skein_big_core_4way(cc, data, len);
|
||||
skein_big_core_4x64(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
skein512_4way_close(void *cc, void *dst)
|
||||
skein512_4x64_close(void *cc, void *dst)
|
||||
{
|
||||
skein_big_close_4way(cc, 0, 0, dst, 64);
|
||||
skein_big_close_4x64(cc, 0, 0, dst, 64);
|
||||
}
|
||||
|
||||
#endif // AVX2
|
||||
@@ -1231,7 +1231,7 @@ void skein512_2x64_init( skein512_2x64_context *sc )
|
||||
}
|
||||
|
||||
static void
|
||||
skein_big_core_2way( skein512_2x64_context *sc, const void *data,
|
||||
skein_big_core_2x64( skein512_2x64_context *sc, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
v128u64_t *vdata = (v128u64_t*)data;
|
||||
@@ -1278,7 +1278,7 @@ skein_big_core_2way( skein512_2x64_context *sc, const void *data,
|
||||
}
|
||||
|
||||
static void
|
||||
skein_big_close_2way( skein512_2x64_context *sc, unsigned ub, unsigned n,
|
||||
skein_big_close_2x64( skein512_2x64_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_len )
|
||||
{
|
||||
v128u64_t *buf;
|
||||
@@ -1471,13 +1471,13 @@ skein512_2x64_final16( skein512_2x64_context *sc, void *out, const void *data )
|
||||
void
|
||||
skein256_2x64_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
skein_big_core_2way(cc, data, len);
|
||||
skein_big_core_2x64(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
skein256_2x64_close(void *cc, void *dst)
|
||||
{
|
||||
skein_big_close_2way(cc, 0, 0, dst, 32);
|
||||
skein_big_close_2x64(cc, 0, 0, dst, 32);
|
||||
}
|
||||
|
||||
|
||||
@@ -1485,13 +1485,12 @@ skein256_2x64_close(void *cc, void *dst)
|
||||
void
|
||||
skein512_2x64_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
skein_big_core_2way(cc, data, len);
|
||||
skein_big_core_2x64(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
skein512_2x64_close(void *cc, void *dst)
|
||||
{
|
||||
skein_big_close_2way(cc, 0, 0, dst, 64);
|
||||
skein_big_close_2x64(cc, 0, 0, dst, 64);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -52,24 +52,36 @@ typedef struct
|
||||
__m512i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
size_t ptr;
|
||||
uint64_t bcount;
|
||||
} skein_8way_big_context __attribute__ ((aligned (128)));
|
||||
} skein_8x64_big_context __attribute__ ((aligned (128)));
|
||||
|
||||
typedef skein_8way_big_context skein512_8way_context;
|
||||
typedef skein_8way_big_context skein256_8way_context;
|
||||
typedef skein_8x64_big_context skein512_8x64_context;
|
||||
typedef skein_8x64_big_context skein256_8x64_context;
|
||||
|
||||
void skein512_8way_full( skein512_8way_context *sc, void *out,
|
||||
void skein512_8x64_full( skein512_8x64_context *sc, void *out,
|
||||
const void *data, size_t len );
|
||||
void skein512_8way_init( skein512_8way_context *sc );
|
||||
void skein512_8way_update( void *cc, const void *data, size_t len );
|
||||
void skein512_8way_close( void *cc, void *dst );
|
||||
void skein512_8x64_init( skein512_8x64_context *sc );
|
||||
void skein512_8x64_update( void *cc, const void *data, size_t len );
|
||||
void skein512_8x64_close( void *cc, void *dst );
|
||||
|
||||
void skein512_8way_prehash64( skein512_8way_context *sc, const void *data );
|
||||
void skein512_8way_final16( skein512_8way_context *sc, void *out,
|
||||
void skein512_8x64_prehash64( skein512_8x64_context *sc, const void *data );
|
||||
void skein512_8x64_final16( skein512_8x64_context *sc, void *out,
|
||||
const void *data );
|
||||
|
||||
void skein256_8way_init( skein256_8way_context *sc );
|
||||
void skein256_8way_update( void *cc, const void *data, size_t len );
|
||||
void skein256_8way_close( void *cc, void *dst );
|
||||
void skein256_8x64_init( skein256_8x64_context *sc );
|
||||
void skein256_8x64_update( void *cc, const void *data, size_t len );
|
||||
void skein256_8x64_close( void *cc, void *dst );
|
||||
|
||||
#define skein512_8way_context skein512_8x64_context
|
||||
#define skein512_8way_full skein512_8x64_full
|
||||
#define skein512_8way_init skein512_8x64_init
|
||||
#define skein512_8way_update skein512_8x64_update
|
||||
#define skein512_8way_close skein512_8x64_close
|
||||
#define skein512_8way_prehash64 skein512_8x64_prehash64
|
||||
#define skein512_8way_final16 skein512_8x64_final16
|
||||
#define skein256_8way_context skein256_8x64_context
|
||||
#define skein256_8way_init skein256_8x64_init
|
||||
#define skein256_8way_update skein256_8x64_update
|
||||
#define skein256_8way_close skein256_8x64_close
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
@@ -81,25 +93,35 @@ typedef struct
|
||||
__m256i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
size_t ptr;
|
||||
uint64_t bcount;
|
||||
} skein_4way_big_context __attribute__ ((aligned (128)));
|
||||
} skein_4x64_big_context __attribute__ ((aligned (128)));
|
||||
|
||||
typedef skein_4way_big_context skein512_4way_context;
|
||||
typedef skein_4way_big_context skein256_4way_context;
|
||||
typedef skein_4x64_big_context skein512_4x64_context;
|
||||
typedef skein_4x64_big_context skein256_4x64_context;
|
||||
|
||||
void skein512_4way_init( skein512_4way_context *sc );
|
||||
void skein512_4way_full( skein512_4way_context *sc, void *out,
|
||||
void skein512_4x64_init( skein512_4x64_context *sc );
|
||||
void skein512_4x64_full( skein512_4x64_context *sc, void *out,
|
||||
const void *data, size_t len );
|
||||
void skein512_4way_update( void *cc, const void *data, size_t len );
|
||||
void skein512_4way_close( void *cc, void *dst );
|
||||
|
||||
void skein256_4way_init( skein256_4way_context *sc );
|
||||
void skein256_4way_update( void *cc, const void *data, size_t len );
|
||||
void skein256_4way_close( void *cc, void *dst );
|
||||
|
||||
void skein512_4way_prehash64( skein512_4way_context *sc, const void *data );
|
||||
void skein512_4way_final16( skein512_4way_context *sc, void *out,
|
||||
void skein512_4x64_update( void *cc, const void *data, size_t len );
|
||||
void skein512_4x64_close( void *cc, void *dst );
|
||||
void skein512_4x64_prehash64( skein512_4x64_context *sc, const void *data );
|
||||
void skein512_4x64_final16( skein512_4x64_context *sc, void *out,
|
||||
const void *data );
|
||||
|
||||
void skein256_4x64_init( skein256_4x64_context *sc );
|
||||
void skein256_4x64_update( void *cc, const void *data, size_t len );
|
||||
void skein256_4x64_close( void *cc, void *dst );
|
||||
|
||||
#define skein512_4way_context skein512_4x64_context
|
||||
#define skein512_4way_full skein512_4x64_full
|
||||
#define skein512_4way_init skein512_4x64_init
|
||||
#define skein512_4way_update skein512_4x64_update
|
||||
#define skein512_4way_close skein512_4x64_close
|
||||
#define skein512_4way_prehash64 skein512_4x64_prehash64
|
||||
#define skein512_4way_final16 skein512_4x64_final16
|
||||
#define skein256_4way_context skein256_4x64_context
|
||||
#define skein256_4way_init skein256_4x64_init
|
||||
#define skein256_4way_update skein256_4x64_update
|
||||
#define skein256_4way_close skein256_4x64_close
|
||||
|
||||
#endif
|
||||
|
||||
@@ -109,10 +131,10 @@ typedef struct
|
||||
v128u64_t h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
size_t ptr;
|
||||
uint64_t bcount;
|
||||
} skein_2way_big_context __attribute__ ((aligned (128)));
|
||||
} skein_2x64_big_context __attribute__ ((aligned (128)));
|
||||
|
||||
typedef skein_2way_big_context skein512_2x64_context;
|
||||
typedef skein_2way_big_context skein256_2x64_context;
|
||||
typedef skein_2x64_big_context skein512_2x64_context;
|
||||
typedef skein_2x64_big_context skein256_2x64_context;
|
||||
|
||||
void skein512_2x64_init( skein512_2x64_context *sc );
|
||||
void skein512_2x64_full( skein512_2x64_context *sc, void *out,
|
||||
|
||||
@@ -21,17 +21,17 @@ int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
skein512_8way_context ctx;
|
||||
skein512_8x64_context ctx;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
skein512_8way_prehash64( &ctx, vdata );
|
||||
skein512_8x64_prehash64( &ctx, vdata );
|
||||
do
|
||||
{
|
||||
skein512_8way_final16( &ctx, hash, vdata + (16*8) );
|
||||
skein512_8way_full( &ctx, hash, hash, 64 );
|
||||
skein512_8x64_final16( &ctx, hash, vdata + (16*8) );
|
||||
skein512_8x64_full( &ctx, hash, hash, 64 );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hashq3[ lane ] <= targq3 && !bench ) )
|
||||
@@ -71,16 +71,16 @@ int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
skein512_4way_context ctx;
|
||||
skein512_4x64_context ctx;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_prehash64( &ctx, vdata );
|
||||
skein512_4x64_prehash64( &ctx, vdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
skein512_4way_final16( &ctx, hash, vdata + (16*4) );
|
||||
skein512_4way_full( &ctx, hash, hash, 64 );
|
||||
skein512_4x64_final16( &ctx, hash, vdata + (16*4) );
|
||||
skein512_4x64_full( &ctx, hash, hash, 64 );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash_q3[ lane ] <= targ_q3 )
|
||||
|
||||
Reference in New Issue
Block a user