mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.12.3
This commit is contained in:
@@ -24,11 +24,7 @@ void skeinhash_8way( void *state, const void *input )
|
||||
uint32_t vhash32[16*8] __attribute__ ((aligned (128)));
|
||||
sha256_8way_context ctx_sha256;
|
||||
|
||||
skein512_8way_full( &ctx_skein, vhash64, input, 80 );
|
||||
|
||||
// skein512_8way_update( &ctx_skein, input + (64*8), 16 );
|
||||
// skein512_8way_close( &ctx_skein, vhash64 );
|
||||
|
||||
skein512_8way_final16( &ctx_skein, vhash64, input + (64*8) );
|
||||
rintrlv_8x64_8x32( vhash32, vhash64, 512 );
|
||||
|
||||
sha256_8way_init( &ctx_sha256 );
|
||||
@@ -57,8 +53,7 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
// skein512_8way_init( &skein512_8way_ctx );
|
||||
// skein512_8way_update( &skein512_8way_ctx, vdata, 64 );
|
||||
skein512_8way_prehash64( &skein512_8way_ctx, vdata );
|
||||
do
|
||||
{
|
||||
skeinhash_8way( hash, vdata );
|
||||
@@ -85,14 +80,14 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#elif defined (SKEIN_4WAY)
|
||||
|
||||
//static __thread skein512_4way_context skein512_4way_ctx
|
||||
// __attribute__ ((aligned (64)));
|
||||
static __thread skein512_4way_context skein512_4way_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void skeinhash_4way( void *state, const void *input )
|
||||
{
|
||||
uint64_t vhash64[8*4] __attribute__ ((aligned (128)));
|
||||
skein512_4way_context ctx_skein;
|
||||
// memcpy( &ctx_skein, &skein512_4way_ctx, sizeof( ctx_skein ) );
|
||||
memcpy( &ctx_skein, &skein512_4way_ctx, sizeof( ctx_skein ) );
|
||||
#if defined(__SHA__)
|
||||
uint32_t hash0[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[16] __attribute__ ((aligned (64)));
|
||||
@@ -104,10 +99,7 @@ void skeinhash_4way( void *state, const void *input )
|
||||
sha256_4way_context ctx_sha256;
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx_skein, vhash64, input, 80 );
|
||||
|
||||
// skein512_4way_update( &ctx_skein, input + (64*4), 16 );
|
||||
// skein512_4way_close( &ctx_skein, vhash64 );
|
||||
skein512_4way_final16( &ctx_skein, vhash64, input + (64*4) );
|
||||
|
||||
#if defined(__SHA__)
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 512 );
|
||||
@@ -156,8 +148,7 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
// skein512_4way_init( &skein512_4way_ctx );
|
||||
// skein512_4way_update( &skein512_4way_ctx, vdata, 64 );
|
||||
skein512_4way_prehash64( &skein512_4way_ctx, vdata );
|
||||
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
|
||||
@@ -728,6 +728,86 @@ void skein512_8way_full( skein512_8way_context *sc, void *out, const void *data,
|
||||
casti_m512i( out, 7 ) = h7;
|
||||
}
|
||||
|
||||
void
|
||||
skein512_8way_prehash64( skein512_8way_context *sc, const void *data )
|
||||
{
|
||||
__m512i *vdata = (__m512*)data;
|
||||
__m512i *buf = sc->buf;
|
||||
buf[0] = vdata[0];
|
||||
buf[1] = vdata[1];
|
||||
buf[2] = vdata[2];
|
||||
buf[3] = vdata[3];
|
||||
buf[4] = vdata[4];
|
||||
buf[5] = vdata[5];
|
||||
buf[6] = vdata[6];
|
||||
buf[7] = vdata[7];
|
||||
register __m512i h0 = m512_const1_64( 0x4903ADFF749C51CE );
|
||||
register __m512i h1 = m512_const1_64( 0x0D95DE399746DF03 );
|
||||
register __m512i h2 = m512_const1_64( 0x8FD1934127C79BCE );
|
||||
register __m512i h3 = m512_const1_64( 0x9A255629FF352CB1 );
|
||||
register __m512i h4 = m512_const1_64( 0x5DB62599DF6CA7B0 );
|
||||
register __m512i h5 = m512_const1_64( 0xEABE394CA9D5C3F4 );
|
||||
register __m512i h6 = m512_const1_64( 0x991112C71A75B523 );
|
||||
register __m512i h7 = m512_const1_64( 0xAE18A40B660FCC33 );
|
||||
uint64_t bcount = 1;
|
||||
|
||||
UBI_BIG_8WAY( 224, 0 );
|
||||
sc->h0 = h0;
|
||||
sc->h1 = h1;
|
||||
sc->h2 = h2;
|
||||
sc->h3 = h3;
|
||||
sc->h4 = h4;
|
||||
sc->h5 = h5;
|
||||
sc->h6 = h6;
|
||||
sc->h7 = h7;
|
||||
}
|
||||
|
||||
void
|
||||
skein512_8way_final16( skein512_8way_context *sc, void *output,
|
||||
const void *data )
|
||||
{
|
||||
__m512i *in = (__m512i*)data;
|
||||
__m512i *buf = sc->buf;
|
||||
__m512i *out = (__m512i*)output;
|
||||
register __m512i h0 = sc->h0;
|
||||
register __m512i h1 = sc->h1;
|
||||
register __m512i h2 = sc->h2;
|
||||
register __m512i h3 = sc->h3;
|
||||
register __m512i h4 = sc->h4;
|
||||
register __m512i h5 = sc->h5;
|
||||
register __m512i h6 = sc->h6;
|
||||
register __m512i h7 = sc->h7;
|
||||
|
||||
const __m512i zero = m512_zero;
|
||||
buf[0] = in[0];
|
||||
buf[1] = in[1];
|
||||
buf[2] = zero;
|
||||
buf[3] = zero;
|
||||
buf[4] = zero;
|
||||
buf[5] = zero;
|
||||
buf[6] = zero;
|
||||
buf[7] = zero;
|
||||
|
||||
uint64_t bcount = 1;
|
||||
UBI_BIG_8WAY( 352, 16 );
|
||||
|
||||
buf[0] = zero;
|
||||
buf[1] = zero;
|
||||
|
||||
bcount = 0;
|
||||
UBI_BIG_8WAY( 510, 8 );
|
||||
|
||||
out[0] = h0;
|
||||
out[1] = h1;
|
||||
out[2] = h2;
|
||||
out[3] = h3;
|
||||
out[4] = h4;
|
||||
out[5] = h5;
|
||||
out[6] = h6;
|
||||
out[7] = h7;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
skein256_8way_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
@@ -942,6 +1022,83 @@ skein512_4way_full( skein512_4way_context *sc, void *out, const void *data,
|
||||
casti_m256i( out, 7 ) = h7;
|
||||
}
|
||||
|
||||
void
|
||||
skein512_4way_prehash64( skein512_4way_context *sc, const void *data )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buf = sc->buf;
|
||||
buf[0] = vdata[0];
|
||||
buf[1] = vdata[1];
|
||||
buf[2] = vdata[2];
|
||||
buf[3] = vdata[3];
|
||||
buf[4] = vdata[4];
|
||||
buf[5] = vdata[5];
|
||||
buf[6] = vdata[6];
|
||||
buf[7] = vdata[7];
|
||||
register __m256i h0 = m256_const1_64( 0x4903ADFF749C51CE );
|
||||
register __m256i h1 = m256_const1_64( 0x0D95DE399746DF03 );
|
||||
register __m256i h2 = m256_const1_64( 0x8FD1934127C79BCE );
|
||||
register __m256i h3 = m256_const1_64( 0x9A255629FF352CB1 );
|
||||
register __m256i h4 = m256_const1_64( 0x5DB62599DF6CA7B0 );
|
||||
register __m256i h5 = m256_const1_64( 0xEABE394CA9D5C3F4 );
|
||||
register __m256i h6 = m256_const1_64( 0x991112C71A75B523 );
|
||||
register __m256i h7 = m256_const1_64( 0xAE18A40B660FCC33 );
|
||||
uint64_t bcount = 1;
|
||||
|
||||
UBI_BIG_4WAY( 224, 0 );
|
||||
sc->h0 = h0;
|
||||
sc->h1 = h1;
|
||||
sc->h2 = h2;
|
||||
sc->h3 = h3;
|
||||
sc->h4 = h4;
|
||||
sc->h5 = h5;
|
||||
sc->h6 = h6;
|
||||
sc->h7 = h7;
|
||||
}
|
||||
|
||||
void
|
||||
skein512_4way_final16( skein512_4way_context *sc, void *out, const void *data )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buf = sc->buf;
|
||||
register __m256i h0 = sc->h0;
|
||||
register __m256i h1 = sc->h1;
|
||||
register __m256i h2 = sc->h2;
|
||||
register __m256i h3 = sc->h3;
|
||||
register __m256i h4 = sc->h4;
|
||||
register __m256i h5 = sc->h5;
|
||||
register __m256i h6 = sc->h6;
|
||||
register __m256i h7 = sc->h7;
|
||||
|
||||
const __m256i zero = m256_zero;
|
||||
buf[0] = vdata[0];
|
||||
buf[1] = vdata[1];
|
||||
buf[2] = zero;
|
||||
buf[3] = zero;
|
||||
buf[4] = zero;
|
||||
buf[5] = zero;
|
||||
buf[6] = zero;
|
||||
buf[7] = zero;
|
||||
|
||||
uint64_t bcount = 1;
|
||||
UBI_BIG_4WAY( 352, 16 );
|
||||
|
||||
buf[0] = zero;
|
||||
buf[1] = zero;
|
||||
|
||||
bcount = 0;
|
||||
UBI_BIG_4WAY( 510, 8 );
|
||||
|
||||
casti_m256i( out, 0 ) = h0;
|
||||
casti_m256i( out, 1 ) = h1;
|
||||
casti_m256i( out, 2 ) = h2;
|
||||
casti_m256i( out, 3 ) = h3;
|
||||
casti_m256i( out, 4 ) = h4;
|
||||
casti_m256i( out, 5 ) = h5;
|
||||
casti_m256i( out, 6 ) = h6;
|
||||
casti_m256i( out, 7 ) = h7;
|
||||
}
|
||||
|
||||
void
|
||||
skein256_4way_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
|
||||
@@ -69,6 +69,10 @@ void skein512_8way_init( skein512_8way_context *sc );
|
||||
void skein512_8way_update( void *cc, const void *data, size_t len );
|
||||
void skein512_8way_close( void *cc, void *dst );
|
||||
|
||||
void skein512_8way_prehash64( skein512_8way_context *sc, const void *data );
|
||||
void skein512_8way_final16( skein512_8way_context *sc, void *out,
|
||||
const void *data );
|
||||
|
||||
void skein256_8way_init( skein256_8way_context *sc );
|
||||
void skein256_8way_update( void *cc, const void *data, size_t len );
|
||||
void skein256_8way_close( void *cc, void *dst );
|
||||
@@ -96,6 +100,10 @@ void skein256_4way_init( skein256_4way_context *sc );
|
||||
void skein256_4way_update( void *cc, const void *data, size_t len );
|
||||
void skein256_4way_close( void *cc, void *dst );
|
||||
|
||||
void skein512_4way_prehash64( skein512_4way_context *sc, const void *data );
|
||||
void skein512_4way_final16( skein512_4way_context *sc, void *out,
|
||||
const void *data );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -5,20 +5,16 @@
|
||||
|
||||
#if defined(SKEIN_8WAY)
|
||||
|
||||
// static __thread skein512_8way_context skein512_8way_ctx
|
||||
// __attribute__ ((aligned (64)));
|
||||
static __thread skein512_8way_context skein512_8way_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void skein2hash_8way( void *output, const void *input )
|
||||
{
|
||||
uint64_t hash[16*8] __attribute__ ((aligned (128)));
|
||||
skein512_8way_context ctx;
|
||||
// memcpy( &ctx, &skein512_8way_ctx, sizeof( ctx ) );
|
||||
|
||||
skein512_8way_full( &ctx, hash, input, 80 );
|
||||
|
||||
// skein512_8way_update( &ctx, input + (64*8), 16 );
|
||||
// skein512_8way_close( &ctx, hash );
|
||||
memcpy( &ctx, &skein512_8way_ctx, sizeof( ctx ) );
|
||||
|
||||
skein512_8way_final16( &ctx, hash, input + (64*8) );
|
||||
skein512_8way_full( &ctx, output, hash, 64 );
|
||||
}
|
||||
|
||||
@@ -38,16 +34,17 @@ int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
skein512_8way_context ctx;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
// skein512_8way_init( &skein512_8way_ctx );
|
||||
// skein512_8way_update( &skein512_8way_ctx, vdata, 64 );
|
||||
skein512_8way_prehash64( &ctx, vdata );
|
||||
do
|
||||
{
|
||||
skein2hash_8way( hash, vdata );
|
||||
skein512_8way_final16( &ctx, hash, vdata + (16*8) );
|
||||
skein512_8way_full( &ctx, hash, hash, 64 );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hashq3[ lane ] <= targq3 && !bench ) )
|
||||
@@ -71,19 +68,16 @@ int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#elif defined(SKEIN_4WAY)
|
||||
|
||||
//static __thread skein512_4way_context skein512_4way_ctx
|
||||
// __attribute__ ((aligned (64)));
|
||||
static __thread skein512_4way_context skein512_4way_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void skein2hash_4way( void *output, const void *input )
|
||||
{
|
||||
skein512_4way_context ctx;
|
||||
// memcpy( &ctx, &skein512_4way_ctx, sizeof( ctx ) );
|
||||
memcpy( &ctx, &skein512_4way_ctx, sizeof( ctx ) );
|
||||
uint64_t hash[16*4] __attribute__ ((aligned (64)));
|
||||
|
||||
// skein512_4way_update( &ctx, input + (64*4), 16 );
|
||||
// skein512_4way_close( &ctx, hash );
|
||||
|
||||
skein512_4way_full( &ctx, hash, input, 80 );
|
||||
skein512_4way_final16( &ctx, hash, input + (64*4) );
|
||||
skein512_4way_full( &ctx, output, hash, 64 );
|
||||
}
|
||||
|
||||
@@ -103,15 +97,16 @@ int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
skein512_4way_context ctx;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
// skein512_4way_init( &skein512_4way_ctx );
|
||||
// skein512_4way_update( &skein512_4way_ctx, vdata, 64 );
|
||||
skein512_4way_prehash64( &ctx, vdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
skein2hash_4way( hash, vdata );
|
||||
skein512_4way_final16( &ctx, hash, vdata + (16*4) );
|
||||
skein512_4way_full( &ctx, hash, hash, 64 );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash_q3[ lane ] <= targ_q3 )
|
||||
|
||||
Reference in New Issue
Block a user