Compare commits

..

3 Commits

Author SHA1 Message Date
Jay D Dee
3da2b958cf v3.12.2 2020-02-09 13:30:40 -05:00
Jay D Dee
dc2f8d81d3 v3.12.1 2020-02-07 20:18:20 -05:00
Jay D Dee
fc97ef174a v3.12.0.1 2020-02-06 22:50:20 -05:00
42 changed files with 1641 additions and 1682 deletions

View File

@@ -65,6 +65,31 @@ If not what makes it happen or not happen?
Change Log
----------
v3.12.2
Fixed xevan, skein, skein2 AVX2, #238.
Reversed polarity of AVX2 vector bit test utilities, and all users, to be
logically and semantically correct. Follow up to issue #236.
v3.12.1
Fixed anime AVX2 low difficulty shares, git issue #236.
Periodic summary now reports lost hash rate due to rejected and stale shares,
displayed only when non-zero.
v3.12.0.1
Fixed hodl rejects, git issue #237.
Fixed debug code added in v3.12.0 to work with AVX2 to be enabled only
after low difficulty share have been seen to avoid unnecessarily excessive
log outout.
Added more digits of precision to diff in log output to help diagnose
low difficulty shares.
v3.12.0
Faster phi2 AVX2 +62%, AVX512 +150% on Intel CPUs. AMD Ryzen AVX2 is

View File

@@ -138,7 +138,7 @@ void bmw512_2way_close( bmw512_2way_context *ctx, void *dst );
#if defined(__AVX2__)
// BMW-512 4 way 64
// BMW-512 64 bit 4 way
typedef struct {
__m256i buf[16];
@@ -149,7 +149,6 @@ typedef struct {
typedef bmw_4way_big_context bmw512_4way_context;
void bmw512_4way_init(void *cc);
void bmw512_4way_update(void *cc, const void *data, size_t len);
@@ -164,6 +163,7 @@ void bmw512_4way_addbits_and_close(
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
// BMW-512 64 bit 8 way
typedef struct {
__m512i buf[16];
__m512i H[16];
@@ -171,6 +171,8 @@ typedef struct {
uint64_t bit_count;
} bmw512_8way_context __attribute__((aligned(128)));
void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data,
size_t len );
void bmw512_8way_init( bmw512_8way_context *ctx );
void bmw512_8way_update( bmw512_8way_context *ctx, const void *data,
size_t len );

View File

@@ -1507,6 +1507,93 @@ void bmw512_8way_close( bmw512_8way_context *ctx, void *dst )
casti_m512i( dst, u ) = h1[ v ];
}
void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data,
size_t len )
{
__m512i *vdata = (__m512i*)data;
__m512i *buf = ctx->buf;
__m512i htmp[16];
__m512i *H = ctx->H;
__m512i *h2 = htmp;
uint64_t bit_count = len * 8;
size_t ptr = 0;
const int buf_size = 128; // bytes of one lane, compatible with len
// Init
H[ 0] = m512_const1_64( 0x8081828384858687 );
H[ 1] = m512_const1_64( 0x88898A8B8C8D8E8F );
H[ 2] = m512_const1_64( 0x9091929394959697 );
H[ 3] = m512_const1_64( 0x98999A9B9C9D9E9F );
H[ 4] = m512_const1_64( 0xA0A1A2A3A4A5A6A7 );
H[ 5] = m512_const1_64( 0xA8A9AAABACADAEAF );
H[ 6] = m512_const1_64( 0xB0B1B2B3B4B5B6B7 );
H[ 7] = m512_const1_64( 0xB8B9BABBBCBDBEBF );
H[ 8] = m512_const1_64( 0xC0C1C2C3C4C5C6C7 );
H[ 9] = m512_const1_64( 0xC8C9CACBCCCDCECF );
H[10] = m512_const1_64( 0xD0D1D2D3D4D5D6D7 );
H[11] = m512_const1_64( 0xD8D9DADBDCDDDEDF );
H[12] = m512_const1_64( 0xE0E1E2E3E4E5E6E7 );
H[13] = m512_const1_64( 0xE8E9EAEBECEDEEEF );
H[14] = m512_const1_64( 0xF0F1F2F3F4F5F6F7 );
H[15] = m512_const1_64( 0xF8F9FAFBFCFDFEFF );
// Update
while ( len > 0 )
{
size_t clen;
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_512( buf + (ptr>>3), vdata, clen >> 3 );
vdata = vdata + (clen>>3);
len -= clen;
ptr += clen;
if ( ptr == buf_size )
{
__m512i *ht;
compress_big_8way( buf, H, h2 );
ht = H;
H = h2;
h2 = ht;
ptr = 0;
}
}
if ( H != ctx->H )
memcpy_512( ctx->H, H, 16 );
// Close
{
__m512i h1[16], h2[16];
size_t u, v;
buf[ ptr>>3 ] = m512_const1_64( 0x80 );
ptr += 8;
if ( ptr > (buf_size - 8) )
{
memset_zero_512( buf + (ptr>>3), (buf_size - ptr) >> 3 );
compress_big_8way( buf, H, h1 );
ptr = 0;
H = h1;
}
memset_zero_512( buf + (ptr>>3), (buf_size - 8 - ptr) >> 3 );
buf[ (buf_size - 8) >> 3 ] = _mm512_set1_epi64( bit_count );
compress_big_8way( buf, H, h2 );
for ( u = 0; u < 16; u ++ )
buf[ u ] = h2[ u ];
compress_big_8way( buf, final_b8, h1 );
for (u = 0, v = 8; u < 8; u ++, v ++)
casti_m512i( out, u ) = h1[ v ];
}
}
#endif // AVX512
#ifdef __cplusplus

View File

@@ -144,7 +144,7 @@ int hodl_scanhash( struct work* work, uint32_t max_nonce,
#if defined(__AES__)
GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, mythr->id );
pthread_barrier_wait( &hodl_barrier );
return scanhash_hodl_wolf( work, max_nonce, hashes_done, thr_info );
return scanhash_hodl_wolf( work, max_nonce, hashes_done, mythr );
#endif
return false;
}

View File

@@ -76,37 +76,34 @@ int scanhash_allium( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(128) hash[8];
uint32_t _ALIGN(128) endiandata[20];
uint32_t _ALIGN(128) edata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
if ( opt_benchmark )
ptarget[7] = 0x3ffff;
for ( int i = 0; i < 19; i++ )
be32enc( &endiandata[i], pdata[i] );
edata[i] = bswap_32( pdata[i] );
sph_blake256_init( &allium_ctx.blake );
sph_blake256( &allium_ctx.blake, endiandata, 64 );
sph_blake256( &allium_ctx.blake, edata, 64 );
do {
be32enc( &endiandata[19], nonce );
allium_hash( hash, endiandata );
if ( hash[7] <= Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
edata[19] = nonce;
allium_hash( hash, edata );
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
pdata[19] = bswap_32( nonce );
submit_solution( work, hash, mythr );
}
nonce++;
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
*hashes_done = pdata[19] - first_nonce;
return 0;
}

View File

@@ -119,7 +119,7 @@ bool register_lyra2rev2_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_lyra2rev2;
gate->hash = (void*)&lyra2rev2_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
opt_target_factor = 256.0;
return true;

View File

@@ -7,27 +7,8 @@
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/cubehash/cube-hash-2way.h"
#if defined (LYRA2REV2_8WAY)
typedef struct {
blake256_8way_context blake;
keccak256_8way_context keccak;
cube_4way_context cube;
skein256_8way_context skein;
bmw256_8way_context bmw;
} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
bool init_lyra2rev2_8way_ctx()
{
keccak256_8way_init( &l2v2_8way_ctx.keccak );
cube_4way_init( &l2v2_8way_ctx.cube, 256, 16, 32 );
skein256_8way_init( &l2v2_8way_ctx.skein );
bmw256_8way_init( &l2v2_8way_ctx.bmw );
return true;
}
#if 0
void lyra2rev2_8way_hash( void *state, const void *input )
{
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
@@ -52,14 +33,24 @@ void lyra2rev2_8way_hash( void *state, const void *input )
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
keccak256_8way_close( &ctx.keccak, vhash );
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 256 );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
cube_4way_init( &ctx.cube, 256, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
// cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
// cube_4way_init( &ctx.cube, 256, 16, 32 );
// cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
//
// dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
// dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
intrlv_2x256( vhash, hash0, hash1, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
@@ -80,15 +71,123 @@ void lyra2rev2_8way_hash( void *state, const void *input )
skein256_8way_update( &ctx.skein, vhash, 32 );
skein256_8way_close( &ctx.skein, vhash );
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 256 );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
cube_4way_init( &ctx.cube, 256, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
cube_4way_init( &ctx.cube, 256, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
// cube_4way_init( &ctx.cube, 256, 16, 32 );
// cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
// cube_4way_init( &ctx.cube, 256, 16, 32 );
// cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
//
// dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
// dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 );
bmw256_8way_update( &ctx.bmw, vhash, 32 );
bmw256_8way_close( &ctx.bmw, state );
}
#endif
#if defined (LYRA2REV2_8WAY)
typedef struct {
blake256_8way_context blake;
keccak256_8way_context keccak;
cubehashParam cube;
skein256_8way_context skein;
bmw256_8way_context bmw;
} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
bool init_lyra2rev2_8way_ctx()
{
keccak256_8way_init( &l2v2_8way_ctx.keccak );
cubehashInit( &l2v2_8way_ctx.cube, 256, 16, 32 );
skein256_8way_init( &l2v2_8way_ctx.skein );
bmw256_8way_init( &l2v2_8way_ctx.bmw );
return true;
}
void lyra2rev2_8way_hash( void *state, const void *input )
{
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
uint32_t hash0[8] __attribute__ ((aligned (64)));
uint32_t hash1[8] __attribute__ ((aligned (64)));
uint32_t hash2[8] __attribute__ ((aligned (64)));
uint32_t hash3[8] __attribute__ ((aligned (64)));
uint32_t hash4[8] __attribute__ ((aligned (64)));
uint32_t hash5[8] __attribute__ ((aligned (64)));
uint32_t hash6[8] __attribute__ ((aligned (64)));
uint32_t hash7[8] __attribute__ ((aligned (64)));
lyra2v2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v2_8way_ctx, sizeof(l2v2_8way_ctx) );
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
blake256_8way_close( &ctx.blake, vhash );
rintrlv_8x32_8x64( vhashA, vhash, 256 );
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
keccak256_8way_close( &ctx.keccak, vhash );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
intrlv_2x256( vhash, hash0, hash1, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash0, hash1, vhash, 256 );
intrlv_2x256( vhash, hash2, hash3, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash2, hash3, vhash, 256 );
intrlv_2x256( vhash, hash4, hash5, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash4, hash5, vhash, 256 );
intrlv_2x256( vhash, hash6, hash7, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash6, hash7, vhash, 256 );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 );
skein256_8way_update( &ctx.skein, vhash, 32 );
skein256_8way_close( &ctx.skein, vhash );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 );
@@ -97,49 +196,49 @@ void lyra2rev2_8way_hash( void *state, const void *input )
bmw256_8way_close( &ctx.bmw, state );
}
int scanhash_lyra2rev2_8way( struct work *work, uint32_t max_nonce,
int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<3]);
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &hash[7*8];
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
const uint32_t Htarg = ptarget[7];
__m256i *noncev = (__m256i*)vdata + 19; // aligned
int thr_id = mythr->id;
const uint32_t targ32 = ptarget[7];
__m256i *noncev = (__m256i*)vdata + 19;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
if ( bench ) ptarget[7] = 0x0000ff;
mm256_bswap32_intrlv80_8x32( vdata, pdata );
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
blake256_8way_init( &l2v2_8way_ctx.blake );
blake256_8way_update( &l2v2_8way_ctx.blake, vdata, 64 );
do
{
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
n+3, n+2, n+1, n ) );
lyra2rev2_8way_hash( hash, vdata );
pdata[19] = n;
for ( int lane = 0; lane < 8; lane++ ) if ( hash7[lane] <= Htarg )
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hashd7[lane] <= targ32 ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
n += 8;
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
@@ -226,15 +325,16 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *hashd7 = &(hash[7<<2]);
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
const uint32_t Htarg = ptarget[7];
__m128i *noncev = (__m128i*)vdata + 19; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t targ32 = ptarget[7];
__m128i *noncev = (__m128i*)vdata + 19;
int thr_id = mythr->id;
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
@@ -249,20 +349,20 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
lyra2rev2_4way_hash( hash, vdata );
pdata[19] = n;
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
for ( int lane = 0; lane < 4; lane++ ) if ( hashd7[lane] <= targ32 )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
n += 4;
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -99,7 +99,7 @@ int scanhash_lyra2rev2( struct work *work,
lyra2rev2_hash(hash, endiandata);
if (hash[7] <= Htarg )
if( fulltest( hash, ptarget ) && !opt_benchmark )
if( valid_hash( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
submit_solution( work, hash, mythr );

View File

@@ -130,15 +130,15 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
{
uint32_t hash[8*16] __attribute__ ((aligned (128)));
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
uint32_t *hash7 = &hash[7<<4];
uint32_t *hashd7 = &hash[7*16];
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
const uint32_t last_nonce = max_nonce - 16;
const uint32_t Htarg = ptarget[7];
__m512i *noncev = (__m512i*)vdata + 19; // aligned
const uint32_t targ32 = ptarget[7];
__m512i *noncev = (__m512i*)vdata + 19;
const int thr_id = mythr->id;
if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0000ff;
@@ -159,10 +159,10 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
pdata[19] = n;
for ( int lane = 0; lane < 16; lane++ )
if ( unlikely( hash7[lane] <= Htarg ) )
if ( unlikely( hashd7[lane] <= targ32 ) )
{
extr_lane_16x32( lane_hash, hash, lane, 256 );
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
@@ -170,6 +170,7 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
}
n += 16;
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
@@ -194,7 +195,7 @@ bool init_lyra2rev3_8way_ctx()
void lyra2rev3_8way_hash( void *state, const void *input )
{
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
uint32_t hash0[8] __attribute__ ((aligned (64)));
uint32_t hash1[8] __attribute__ ((aligned (32)));
uint32_t hash2[8] __attribute__ ((aligned (32)));
@@ -250,17 +251,17 @@ void lyra2rev3_8way_hash( void *state, const void *input )
int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &hash[7<<3];
uint32_t *hashd7 = &hash[7*8];
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
const uint32_t Htarg = ptarget[7];
__m256i *noncev = (__m256i*)vdata + 19; // aligned
const uint32_t targ32 = ptarget[7];
__m256i *noncev = (__m256i*)vdata + 19;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
@@ -277,7 +278,7 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
pdata[19] = n;
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hash7[lane] <= Htarg ) )
if ( unlikely( hashd7[lane] <= targ32 ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
@@ -357,42 +358,41 @@ int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce,
{
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *hashd7 = &(hash[7*4]);
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
const uint32_t Htarg = ptarget[7];
__m128i *noncev = (__m128i*)vdata + 19; // aligned
const int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t targ32 = ptarget[7];
__m128i *noncev = (__m128i*)vdata + 19;
const int thr_id = mythr->id;
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
mm128_bswap32_intrlv80_4x32( vdata, pdata );
*noncev = _mm_set_epi32( n+3, n+2, n+1, n );
blake256_4way_init( &l2v3_4way_ctx.blake );
blake256_4way_update( &l2v3_4way_ctx.blake, vdata, 64 );
do
{
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
lyra2rev3_4way_hash( hash, vdata );
pdata[19] = n;
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
for ( int lane = 0; lane < 4; lane++ ) if ( hashd7[lane] <= targ32 )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) );
n += 4;
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
pdata[19] = n;
*hashes_done = n - first_nonce + 1;
return 0;
}

View File

@@ -88,7 +88,7 @@ int scanhash_lyra2rev3( struct work *work,
lyra2rev3_hash(hash, endiandata);
if (hash[7] <= Htarg )
if( fulltest( hash, ptarget ) && !opt_benchmark )
if( valid_hash( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
submit_solution( work, hash, mythr );

View File

@@ -56,7 +56,7 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
int thr_id = mythr->id;
if (opt_benchmark)
ptarget[7] = 0x0000ff;
@@ -65,14 +65,13 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
be32enc(&endiandata[i], pdata[i]);
}
lyra2z_midstate( endiandata );
lyra2z_midstate( endiandata );
do {
be32enc(&endiandata[19], nonce);
lyra2z_hash( hash, endiandata );
if ( hash[7] <= Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
submit_solution( work, hash, mythr );

View File

@@ -9,7 +9,7 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
{
uint32_t _ALIGN(256) hash[16];
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, input, 80, input, 80,
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, input, 80, input, 80,
2, 330, 256 );
memcpy(state, hash, 32);
@@ -18,38 +18,40 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
int scanhash_lyra2z330( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__ ((aligned (64)));
uint32_t hash[8] __attribute__ ((aligned (128)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
if (opt_benchmark)
ptarget[7] = 0x0000ff;
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
do
{
be32enc( &endiandata[19], nonce );
lyra2z330_hash( hash, endiandata, work->height );
if ( hash[7] <= Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
edata[19] = nonce;
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, edata, 80, edata, 80,
2, 330, 256 );
// lyra2z330_hash( hash, edata, work->height );
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
be32enc( pdata + 19, nonce );
submit_solution( work, hash, mythr );
}
nonce++;
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
*hashes_done = nonce - first_nonce;
return 0;
}

View File

@@ -162,59 +162,35 @@ int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = {
0,
0xF,
0xFF,
0xFFF,
0xFFFF,
0x10000000
};
uint32_t masks[] = {
0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0
};
const int thr_id = mythr->id;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
for (int m=0; m < 6; m++)
if (Htarg <= htmax[m])
do
{
anime_4way_hash( hash, vdata );
for ( int i = 0; i < 4; i++ )
if ( valid_hash( hash+(i<<3), ptarget ) && !opt_benchmark )
{
uint32_t mask = masks[m];
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
anime_4way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
{
pdata[19] = n+i;
submit_lane_solution( work, hash+(i<<3), mythr, i );
}
n += 4;
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
break;
pdata[19] = bswap_32( n+i );
submit_solution( work, hash+(i<<3), mythr );
}
*hashes_done = n - first_nonce + 1;
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -126,49 +126,28 @@ int scanhash_anime( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[8] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = {
0,
0xF,
0xFF,
0xFFF,
0xFFFF,
0x10000000
};
uint32_t masks[] = {
0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0
};
const int thr_id = mythr->id;
const int bench = opt_benchmark;
swab32_array( edata, pdata, 20 );
swab32_array( endiandata, pdata, 20 );
for (int m=0; m < 6; m++)
if (Htarg <= htmax[m])
{
uint32_t mask = masks[m];
do
{
be32enc( &endiandata[19], n );
anime_hash( hash, endiandata );
pdata[19] = n;
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
submit_solution( work, hash, mythr );
n++;
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
break;
}
*hashes_done = n - first_nonce + 1;
do
{
edata[19] = n;
anime_hash( hash, edata );
if ( valid_hash( hash, ptarget ) && !bench )
{
be32enc( &pdata[19], n );
submit_solution( work, hash, mythr );
}
n++;
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
}

View File

@@ -1028,7 +1028,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
// B
if ( mm256_anybits1( vh_mask ) )
if ( mm256_anybits0( vh_mask ) )
{
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, 64 );
@@ -1050,14 +1050,14 @@ extern void hmq1725_4way_hash(void *state, const void *input)
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
m256_zero );
if ( mm256_anybits0( vh_mask ) )
if ( mm256_anybits1( vh_mask ) )
{
blake512_4way_init( &ctx.blake );
blake512_4way_update( &ctx.blake, vhash, 64 );
blake512_4way_close( &ctx.blake, vhashA );
}
if ( mm256_anybits1( vh_mask ) )
if ( mm256_anybits0( vh_mask ) )
{
bmw512_4way_init( &ctx.bmw );
bmw512_4way_update( &ctx.bmw, vhash, 64 );
@@ -1101,14 +1101,14 @@ extern void hmq1725_4way_hash(void *state, const void *input)
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
m256_zero );
if ( mm256_anybits0( vh_mask ) )
if ( mm256_anybits1( vh_mask ) )
{
keccak512_4way_init( &ctx.keccak );
keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhashA );
}
if ( mm256_anybits1( vh_mask ) )
if ( mm256_anybits0( vh_mask ) )
{
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, 64 );
@@ -1180,7 +1180,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
// B
if ( mm256_anybits1( vh_mask ) )
if ( mm256_anybits0( vh_mask ) )
{
haval256_5_4way_init( &ctx.haval );
haval256_5_4way_update( &ctx.haval, vhash, 64 );
@@ -1407,7 +1407,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
if ( mm256_anybits1( vh_mask ) )
if ( mm256_anybits0( vh_mask ) )
{
sha512_4way_init( &ctx.sha512 );
sha512_4way_update( &ctx.sha512, vhash, 64 );
@@ -1443,7 +1443,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
// 4x32 for haval
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
if ( mm256_anybits0( vh_mask ) )
if ( mm256_anybits1( vh_mask ) )
{
haval256_5_4way_init( &ctx.haval );
haval256_5_4way_update( &ctx.haval, vhash, 64 );

View File

@@ -402,14 +402,14 @@ void quark_4way_hash( void *state, const void *input )
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
if ( mm256_anybits0( vh_mask ) )
if ( mm256_anybits1( vh_mask ) )
{
blake512_4way_init( &ctx.blake );
blake512_4way_update( &ctx.blake, vhash, 64 );
blake512_4way_close( &ctx.blake, vhashA );
}
if ( mm256_anybits1( vh_mask ) )
if ( mm256_anybits0( vh_mask ) )
{
bmw512_4way_init( &ctx.bmw );
bmw512_4way_update( &ctx.bmw, vhash, 64 );
@@ -427,14 +427,14 @@ void quark_4way_hash( void *state, const void *input )
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
if ( mm256_anybits0( vh_mask ) )
if ( mm256_anybits1( vh_mask ) )
{
keccak512_4way_init( &ctx.keccak );
keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhashA );
}
if ( mm256_anybits1( vh_mask ) )
if ( mm256_anybits0( vh_mask ) )
{
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, 64 );

View File

@@ -13,17 +13,21 @@
#if defined (SKEIN_8WAY)
static __thread skein512_8way_context skein512_8way_ctx
__attribute__ ((aligned (64)));
void skeinhash_8way( void *state, const void *input )
{
uint64_t vhash64[8*8] __attribute__ ((aligned (128)));
skein512_8way_context ctx_skein;
memcpy( &ctx_skein, &skein512_8way_ctx, sizeof( ctx_skein ) );
uint32_t vhash32[16*8] __attribute__ ((aligned (128)));
sha256_8way_context ctx_sha256;
skein512_8way_init( &ctx_skein );
skein512_8way_update( &ctx_skein, input, 80 );
skein512_8way_close( &ctx_skein, vhash64 );
skein512_8way_full( &ctx_skein, vhash64, input, 80 );
// skein512_8way_update( &ctx_skein, input + (64*8), 16 );
// skein512_8way_close( &ctx_skein, vhash64 );
rintrlv_8x64_8x32( vhash32, vhash64, 512 );
@@ -36,63 +40,74 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t vdata[20*8] __attribute__ ((aligned (128)));
uint32_t hash[16*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<3]);
uint32_t *hash_d7 = &(hash[7*8]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t targ_d7 = ptarget[7];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
int thr_id = mythr->id;
__m512i *noncev = (__m512i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
// skein512_8way_init( &skein512_8way_ctx );
// skein512_8way_update( &skein512_8way_ctx, vdata, 64 );
do
{
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
skeinhash_8way( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( hash7[ lane ] <= Htarg )
if ( unlikely( hash_d7[ lane ] <= targ_d7 ) && !bench )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart );
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined (SKEIN_4WAY)
//static __thread skein512_4way_context skein512_4way_ctx
// __attribute__ ((aligned (64)));
void skeinhash_4way( void *state, const void *input )
{
uint64_t vhash64[8*4] __attribute__ ((aligned (128)));
skein512_4way_context ctx_skein;
// memcpy( &ctx_skein, &skein512_4way_ctx, sizeof( ctx_skein ) );
#if defined(__SHA__)
uint32_t hash0[16] __attribute__ ((aligned (64)));
uint32_t hash1[16] __attribute__ ((aligned (64)));
uint32_t hash2[16] __attribute__ ((aligned (64)));
uint32_t hash3[16] __attribute__ ((aligned (64)));
SHA256_CTX ctx_sha256;
SHA256_CTX ctx_sha256;
#else
uint32_t vhash32[16*4] __attribute__ ((aligned (64)));
sha256_4way_context ctx_sha256;
#endif
skein512_4way_init( &ctx_skein );
skein512_4way_update( &ctx_skein, input, 80 );
skein512_4way_close( &ctx_skein, vhash64 );
skein512_4way_full( &ctx_skein, vhash64, input, 80 );
// skein512_4way_update( &ctx_skein, input + (64*4), 16 );
// skein512_4way_close( &ctx_skein, vhash64 );
#if defined(__SHA__)
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 512 );
@@ -127,38 +142,44 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[16*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *hash_d7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t targ_d7 = ptarget[7];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id;
__m256i *noncev = (__m256i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
// skein512_4way_init( &skein512_4way_ctx );
// skein512_4way_update( &skein512_4way_ctx, vdata, 64 );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
skeinhash_4way( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( hash7[ lane ] <= Htarg )
if ( unlikely( ( hash_d7[ lane ] <= targ_d7 ) && !bench ) )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart );
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -4,14 +4,16 @@
bool register_skein_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
#if defined (SKEIN_8WAY)
gate->optimizations = AVX2_OPT | AVX512_OPT;
gate->scanhash = (void*)&scanhash_skein_8way;
gate->hash = (void*)&skeinhash_8way;
#elif defined (SKEIN_4WAY)
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_skein_4way;
gate->hash = (void*)&skeinhash_4way;
#else
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_skein;
gate->hash = (void*)&skeinhash;
#endif

View File

@@ -654,6 +654,80 @@ skein_big_close_8way( skein512_8way_context *sc, unsigned ub, unsigned n,
memcpy_512( dst, buf, out_len >> 3 );
}
void skein512_8way_full( skein512_8way_context *sc, void *out, const void *data,
size_t len )
{
__m512i h0, h1, h2, h3, h4, h5, h6, h7;
__m512i *vdata = (__m512i*)data;
__m512i *buf = sc->buf;
size_t ptr = 0;
unsigned first;
uint64_t bcount = 0;
const int buf_size = 64; // 64 * _m256i
// Init
h0 = m512_const1_64( 0x4903ADFF749C51CE );
h1 = m512_const1_64( 0x0D95DE399746DF03 );
h2 = m512_const1_64( 0x8FD1934127C79BCE );
h3 = m512_const1_64( 0x9A255629FF352CB1 );
h4 = m512_const1_64( 0x5DB62599DF6CA7B0 );
h5 = m512_const1_64( 0xEABE394CA9D5C3F4 );
h6 = m512_const1_64( 0x991112C71A75B523 );
h7 = m512_const1_64( 0xAE18A40B660FCC33 );
// Update
if ( len <= buf_size - ptr )
{
memcpy_512( buf + (ptr>>3), vdata, len>>3 );
ptr += len;
}
else
{
first = ( bcount == 0 ) << 7;
do {
size_t clen;
if ( ptr == buf_size )
{
bcount ++;
UBI_BIG_8WAY( 96 + first, 0 );
first = 0;
ptr = 0;
}
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_512( buf + (ptr>>3), vdata, clen>>3 );
ptr += clen;
vdata += (clen>>3);
len -= clen;
} while ( len > 0 );
}
// Close
unsigned et;
memset_zero_512( buf + (ptr>>3), (buf_size - ptr) >> 3 );
et = 352 + ((bcount == 0) << 7);
UBI_BIG_8WAY( et, ptr );
memset_zero_512( buf, buf_size >> 3 );
bcount = 0;
UBI_BIG_8WAY( 510, 8 );
casti_m512i( out, 0 ) = h0;
casti_m512i( out, 1 ) = h1;
casti_m512i( out, 2 ) = h2;
casti_m512i( out, 3 ) = h3;
casti_m512i( out, 4 ) = h4;
casti_m512i( out, 5 ) = h5;
casti_m512i( out, 6 ) = h6;
casti_m512i( out, 7 ) = h7;
}
void
skein256_8way_update(void *cc, const void *data, size_t len)
{
@@ -709,6 +783,7 @@ void skein512_4way_init( skein512_4way_context *sc )
sc->ptr = 0;
}
// Do not use for 128 bt data length
static void
skein_big_core_4way( skein512_4way_context *sc, const void *data,
size_t len )
@@ -794,6 +869,79 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
memcpy_256( dst, buf, out_len >> 3 );
}
void
skein512_4way_full( skein512_4way_context *sc, void *out, const void *data,
size_t len )
{
__m256i h0, h1, h2, h3, h4, h5, h6, h7;
__m256i *vdata = (__m256i*)data;
__m256i *buf = sc->buf;
size_t ptr = 0;
unsigned first;
const int buf_size = 64; // 64 * __m256i
uint64_t bcount = 0;
h0 = m256_const1_64( 0x4903ADFF749C51CE );
h1 = m256_const1_64( 0x0D95DE399746DF03 );
h2 = m256_const1_64( 0x8FD1934127C79BCE );
h3 = m256_const1_64( 0x9A255629FF352CB1 );
h4 = m256_const1_64( 0x5DB62599DF6CA7B0 );
h5 = m256_const1_64( 0xEABE394CA9D5C3F4 );
h6 = m256_const1_64( 0x991112C71A75B523 );
h7 = m256_const1_64( 0xAE18A40B660FCC33 );
// Update
if ( len <= buf_size - ptr )
{
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
ptr += len;
}
else
{
first = ( bcount == 0 ) << 7;
do {
size_t clen;
if ( ptr == buf_size )
{
bcount ++;
UBI_BIG_4WAY( 96 + first, 0 );
first = 0;
ptr = 0;
}
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
ptr += clen;
vdata += (clen>>3);
len -= clen;
} while ( len > 0 );
}
// Close
unsigned et;
memset_zero_256( buf + (ptr>>3), (buf_size - ptr) >> 3 );
et = 352 + ((bcount == 0) << 7);
UBI_BIG_4WAY( et, ptr );
memset_zero_256( buf, buf_size >> 3 );
bcount = 0;
UBI_BIG_4WAY( 510, 8 );
casti_m256i( out, 0 ) = h0;
casti_m256i( out, 1 ) = h1;
casti_m256i( out, 2 ) = h2;
casti_m256i( out, 3 ) = h3;
casti_m256i( out, 4 ) = h4;
casti_m256i( out, 5 ) = h5;
casti_m256i( out, 6 ) = h6;
casti_m256i( out, 7 ) = h7;
}
void
skein256_4way_update(void *cc, const void *data, size_t len)
{
@@ -806,6 +954,9 @@ skein256_4way_close(void *cc, void *dst)
skein_big_close_4way(cc, 0, 0, dst, 32);
}
// Do not use with 128 bit data
void
skein512_4way_update(void *cc, const void *data, size_t len)
{

View File

@@ -63,6 +63,8 @@ typedef struct
typedef skein_8way_big_context skein512_8way_context;
typedef skein_8way_big_context skein256_8way_context;
void skein512_8way_full( skein512_8way_context *sc, void *out,
const void *data, size_t len );
void skein512_8way_init( skein512_8way_context *sc );
void skein512_8way_update( void *cc, const void *data, size_t len );
void skein512_8way_close( void *cc, void *dst );
@@ -85,6 +87,8 @@ typedef skein_4way_big_context skein512_4way_context;
typedef skein_4way_big_context skein256_4way_context;
void skein512_4way_init( skein512_4way_context *sc );
void skein512_4way_full( skein512_4way_context *sc, void *out,
const void *data, size_t len );
void skein512_4way_update( void *cc, const void *data, size_t len );
void skein512_4way_close( void *cc, void *dst );

View File

@@ -5,114 +5,131 @@
#if defined(SKEIN_8WAY)
// static __thread skein512_8way_context skein512_8way_ctx
// __attribute__ ((aligned (64)));
void skein2hash_8way( void *output, const void *input )
{
skein512_8way_context ctx;
uint64_t hash[16*8] __attribute__ ((aligned (128)));
skein512_8way_context ctx;
// memcpy( &ctx, &skein512_8way_ctx, sizeof( ctx ) );
skein512_8way_init( &ctx );
skein512_8way_update( &ctx, input, 80 );
skein512_8way_close( &ctx, hash );
skein512_8way_full( &ctx, hash, input, 80 );
skein512_8way_init( &ctx );
skein512_8way_update( &ctx, hash, 64 );
skein512_8way_close( &ctx, output );
// skein512_8way_update( &ctx, input + (64*8), 16 );
// skein512_8way_close( &ctx, hash );
skein512_8way_full( &ctx, output, hash, 64 );
}
int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[16*8] __attribute__ ((aligned (128)));
uint64_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[49]);
uint64_t *hashq3 = &(hash[3*8]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint64_t targq3 = ((uint64_t*)ptarget)[3];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
int thr_id = mythr->id;
__m512i *noncev = (__m512i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
// skein512_8way_init( &skein512_8way_ctx );
// skein512_8way_update( &skein512_8way_ctx, vdata, 64 );
do
{
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
skein2hash_8way( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( hash7[ lane<<1 ] <= Htarg )
if ( unlikely( hashq3[ lane ] <= targq3 && !bench ) )
{
extr_lane_8x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
if ( valid_hash( lane_hash, ptarget ) && !bench )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart );
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined(SKEIN_4WAY)
//static __thread skein512_4way_context skein512_4way_ctx
// __attribute__ ((aligned (64)));
void skein2hash_4way( void *output, const void *input )
{
skein512_4way_context ctx;
// memcpy( &ctx, &skein512_4way_ctx, sizeof( ctx ) );
uint64_t hash[16*4] __attribute__ ((aligned (64)));
skein512_4way_init( &ctx );
skein512_4way_update( &ctx, input, 80 );
skein512_4way_close( &ctx, hash );
// skein512_4way_update( &ctx, input + (64*4), 16 );
// skein512_4way_close( &ctx, hash );
skein512_4way_init( &ctx );
skein512_4way_update( &ctx, hash, 64 );
skein512_4way_close( &ctx, output );
skein512_4way_full( &ctx, hash, input, 80 );
skein512_4way_full( &ctx, output, hash, 64 );
}
int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[16*4] __attribute__ ((aligned (64)));
uint64_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[25]);
uint64_t *hash_q3 = &(hash[3*4]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint64_t targ_q3 = ((uint64_t*)ptarget)[3];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
__m256i *noncev = (__m256i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
// skein512_4way_init( &skein512_4way_ctx );
// skein512_4way_update( &skein512_4way_ctx, vdata, 64 );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
skein2hash_4way( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( hash7[ lane<<1 ] <= Htarg )
if ( hash_q3[ lane ] <= targ_q3 )
{
extr_lane_4x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
if ( valid_hash( lane_hash, ptarget ) && !bench )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -30,9 +30,6 @@
#include "algo/groestl/aes_ni/hash-groestl.h"
#endif
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
static void hex_getAlgoString(const uint32_t* prevblock, char *output)
{
char *sptr = output;
@@ -86,7 +83,7 @@ void hex_hash( void* output, const void* input )
void *in = (void*) input;
int size = 80;
char elem = hashOrder[0];
char elem = x16r_hash_order[0];
uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
for ( int i = 0; i < 16; i++ )
@@ -235,7 +232,7 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
const uint32_t last_nonce = max_nonce;
const int thr_id = mythr->id;
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
@@ -244,17 +241,18 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
mm128_bswap32_80( edata, pdata );
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = swab32(pdata[17]);
if ( s_ntime != ntime )
{
hex_getAlgoString( (const uint32_t*) (&edata[1]), hashOrder );
hex_getAlgoString( (const uint32_t*) (&edata[1]), x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
}
// Do midstate prehash on hash functions with block size <= 64 bytes.
const char elem = hashOrder[0];
const char elem = x16r_hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{

View File

@@ -692,14 +692,15 @@ void x16r_4way_hash_generic( void* output, const void* input )
break;
case SKEIN:
if ( i == 0 )
{
skein512_4way_update( &ctx.skein, input + (64<<2), 16 );
skein512_4way_close( &ctx.skein, vhash );
}
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, size );
skein512_4way_full( &ctx.skein, vhash, vhash, size );
}
skein512_4way_close( &ctx.skein, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
break;
case LUFFA:

View File

@@ -238,7 +238,7 @@ int scanhash_x16r( struct work *work, uint32_t max_nonce,
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
*hashes_done = pdata[19] - first_nonce;
return 0;
}

View File

@@ -46,7 +46,7 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
*hashes_done = pdata[19] - first_nonce;
return 0;
}

View File

@@ -35,9 +35,6 @@
#if defined (X16RV2_8WAY)
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16rv2_8way_context_overlay
{
blake512_8way_context blake;
@@ -96,7 +93,7 @@ void x16rv2_8way_hash( void* output, const void* input )
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const char elem = x16r_hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -651,17 +648,19 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
}
// Do midstate prehash on hash functions with block size <= 64 bytes.
const char elem = hashOrder[0];
const char elem = x16r_hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
@@ -737,9 +736,6 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
#elif defined (X16RV2_4WAY)
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16rv2_4way_context_overlay
{
blake512_4way_context blake;
@@ -789,7 +785,7 @@ void x16rv2_4way_hash( void* output, const void* input )
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const char elem = x16r_hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -1130,17 +1126,19 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32(pdata[17]);
if ( s_ntime != ntime )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime );
}
// Do midstate prehash on hash functions with block size <= 64 bytes.
const char elem = hashOrder[0];
const char elem = x16r_hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{

View File

@@ -34,7 +34,6 @@
#endif
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16rv2_context_overlay
{
@@ -74,16 +73,10 @@ void x16rv2_hash( void* output, const void* input )
x16rv2_context_overlay ctx;
void *in = (void*) input;
int size = 80;
/*
if ( s_ntime == UINT32_MAX )
{
const uint8_t* in8 = (uint8_t*) input;
x16_r_s_getAlgoString( &in8[4], hashOrder );
}
*/
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const char elem = x16r_hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -203,48 +196,48 @@ int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(128) hash32[8];
uint32_t _ALIGN(128) endiandata[20];
uint32_t _ALIGN(128) edata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
const bool bench = opt_benchmark;
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
static __thread uint32_t s_ntime = UINT32_MAX;
if ( s_ntime != pdata[17] )
{
uint32_t ntime = swab32(pdata[17]);
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
x16_r_s_getAlgoString( (const uint8_t*) (&edata[1]), x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
applog( LOG_DEBUG, "hash order %s (%08x)",
x16r_hash_order, ntime );
}
if ( opt_benchmark )
ptarget[7] = 0x0cff;
if ( bench ) ptarget[7] = 0x0cff;
do
{
be32enc( &endiandata[19], nonce );
x16rv2_hash( hash32, endiandata );
edata[19] = nonce;
x16rv2_hash( hash32, edata );
if ( hash32[7] <= Htarg )
if (fulltest( hash32, ptarget ) && !opt_benchmark )
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
{
pdata[19] = nonce;
pdata[19] = bswap_32( nonce );
submit_solution( work, hash32, mythr );
}
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
*hashes_done = pdata[19] - first_nonce;
return 0;
}

View File

@@ -97,7 +97,7 @@ int scanhash_x21s( struct work *work, uint32_t max_nonce,
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
*hashes_done = pdata[19] - first_nonce;
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -563,59 +563,31 @@ void sonoa_hash( void *state, const void *input )
}
int scanhash_sonoa( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t _ALIGN(128) hash32[8];
uint32_t _ALIGN(128) endiandata[20];
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t n = pdata[19] - 1;
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
uint64_t htmax[] =
mm128_bswap32_80( edata, pdata );
do
{
0,
0xF,
0xFF,
0xFFF,
0xFFFF,
0x10000000
};
uint32_t masks[] =
{
0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0
};
// we need bigendian data...
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
{
uint32_t mask = masks[m];
do
edata[19] = n;
sonoa_hash( hash64, edata );
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
{
pdata[19] = ++n;
be32enc(&endiandata[19], n);
sonoa_hash(hash32, endiandata);
if ( !( hash32[7] & mask ) )
if ( fulltest( hash32, ptarget ) && !opt_benchmark )
submit_solution( work, hash32, mythr );
} while (n < max_nonce && !work_restart[thr_id].restart);
break;
}
*hashes_done = n - first_nonce + 1;
pdata[19] = bswap_32( n );
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
}

View File

@@ -74,9 +74,7 @@ void x17_8way_hash( void *state, const void *input )
blake512_8way_full( &ctx.blake, vhash, input, 80 );
bmw512_8way_init( &ctx.bmw );
bmw512_8way_update( &ctx.bmw, vhash, 64 );
bmw512_8way_close( &ctx.bmw, vhash );
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
#if defined(__VAES__)
@@ -106,9 +104,7 @@ void x17_8way_hash( void *state, const void *input )
#endif
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, 64 );
skein512_8way_close( &ctx.skein, vhash );
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, 64 );
@@ -287,18 +283,18 @@ void x17_8way_hash( void *state, const void *input )
int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*16] __attribute__ ((aligned (128)));
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<3]);
uint32_t *hashd7 = &(hash[7*8]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
__m512i *noncev = (__m512i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
@@ -310,7 +306,7 @@ int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
x17_8way_hash( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( ( hash7[ lane ] <= Htarg ) && !bench ) )
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) ) )
@@ -378,9 +374,7 @@ void x17_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash );
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, 64 );
@@ -474,18 +468,18 @@ void x17_4way_hash( void *state, const void *input )
int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[16*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *hashd7 = &(hash[ 7*4 ]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce -4;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
@@ -496,7 +490,7 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
x17_4way_hash( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hash7[ lane ] <= Htarg && !bench ) )
if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )

View File

@@ -169,8 +169,8 @@ int scanhash_x17( struct work *work, uint32_t max_nonce,
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
}

View File

@@ -76,9 +76,7 @@ void xevan_8way_hash( void *output, const void *input )
blake512_8way_full( &ctx.blake, vhash, input, 80 );
memset( &vhash[8<<3], 0, 64<<3 );
bmw512_8way_init( &ctx.bmw );
bmw512_8way_update( &ctx.bmw, vhash, dataLen );
bmw512_8way_close( &ctx.bmw, vhash );
bmw512_8way_full( &ctx.bmw, vhash, vhash, dataLen );
#if defined(__VAES__)
@@ -108,9 +106,7 @@ void xevan_8way_hash( void *output, const void *input )
#endif
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, dataLen );
skein512_8way_close( &ctx.skein, vhash );
skein512_8way_full( &ctx.skein, vhash, vhash, dataLen );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, dataLen );
@@ -291,9 +287,7 @@ void xevan_8way_hash( void *output, const void *input )
blake512_8way_full( &ctx.blake, vhash, vhash, dataLen );
bmw512_8way_init( &ctx.bmw );
bmw512_8way_update( &ctx.bmw, vhash, dataLen );
bmw512_8way_close( &ctx.bmw, vhash );
bmw512_8way_full( &ctx.bmw, vhash, vhash, dataLen );
#if defined(__VAES__)
@@ -323,9 +317,7 @@ void xevan_8way_hash( void *output, const void *input )
#endif
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, dataLen );
skein512_8way_close( &ctx.skein, vhash );
skein512_8way_full( &ctx.skein, vhash, vhash, dataLen );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, dataLen );
@@ -504,40 +496,43 @@ void xevan_8way_hash( void *output, const void *input )
int scanhash_xevan_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*16] __attribute__ ((aligned (128)));
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<3]);
uint32_t *hashd7 = &(hash[7*8]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
__m512i *noncev = (__m512i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
xevan_8way_hash( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if unlikely( ( hash7[ lane ] <= Htarg ) )
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
if ( likely( valid_hash( lane_hash, ptarget ) ) )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
@@ -578,8 +573,6 @@ void xevan_4way_hash( void *output, const void *input )
const int dataLen = 128;
xevan_4way_context_overlay ctx __attribute__ ((aligned (64)));
// parallel 4 way
blake512_4way_full( &ctx.blake, vhash, input, 80 );
memset( &vhash[8<<2], 0, 64<<2 );
@@ -598,9 +591,7 @@ void xevan_4way_hash( void *output, const void *input )
// Parallel 4way
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, dataLen );
skein512_4way_close( &ctx.skein, vhash );
skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, dataLen );
@@ -618,15 +609,11 @@ void xevan_4way_hash( void *output, const void *input )
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
shavite512_2way_full( &ctx.shavite, vhashA, vhashA, dataLen );
shavite512_2way_full( &ctx.shavite, vhashB, vhashB, dataLen );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
simd512_2way_full( &ctx.simd, vhashA, vhashA, dataLen );
simd512_2way_full( &ctx.simd, vhashB, vhashB, dataLen );
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
@@ -718,9 +705,7 @@ void xevan_4way_hash( void *output, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, dataLen );
skein512_4way_close( &ctx.skein, vhash );
skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, dataLen );
@@ -738,15 +723,11 @@ void xevan_4way_hash( void *output, const void *input )
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
shavite512_2way_full( &ctx.shavite, vhashA, vhashA, dataLen );
shavite512_2way_full( &ctx.shavite, vhashB, vhashB, dataLen );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
simd512_2way_full( &ctx.simd, vhashA, vhashA, dataLen );
simd512_2way_full( &ctx.simd, vhashB, vhashB, dataLen );
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
@@ -818,41 +799,43 @@ void xevan_4way_hash( void *output, const void *input )
int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t hash[16*4] __attribute__ ((aligned (128)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
int thr_id = mythr->id;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
__m256i *noncev = (__m256i*)vdata + 9;
const uint32_t targ32 = ptarget[7];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
const bool bench = opt_benchmark;
if ( opt_benchmark )
ptarget[7] = 0x0cff;
if ( bench ) ptarget[7] = 0x0cff;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do {
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ), *noncev );
xevan_4way_hash( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( hash7[ lane ] <= Htarg )
if ( unlikely( hashd7[ lane ] <= targ32 ) && ! bench )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( ( n < max_nonce-4 ) && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -56,8 +56,6 @@ typedef struct {
} xevan_ctx_holder;
xevan_ctx_holder xevan_ctx __attribute__ ((aligned (64)));
static __thread sph_blake512_context xevan_blake_mid
__attribute__ ((aligned (64)));
void init_xevan_ctx()
{
@@ -85,34 +83,23 @@ void init_xevan_ctx()
#endif
};
void xevan_blake512_midstate( const void* input )
{
memcpy( &xevan_blake_mid, &xevan_ctx.blake, sizeof xevan_blake_mid );
sph_blake512( &xevan_blake_mid, input, 64 );
}
void xevan_hash(void *output, const void *input)
{
uint32_t _ALIGN(64) hash[32]; // 128 bytes required
uint32_t _ALIGN(64) hash[32]; // 128 bytes required
const int dataLen = 128;
xevan_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
const int midlen = 64; // bytes
const int tail = 80 - midlen; // 16
memcpy( &ctx.blake, &xevan_blake_mid, sizeof xevan_blake_mid );
sph_blake512( &ctx.blake, input + midlen, tail );
sph_blake512_close(&ctx.blake, hash);
xevan_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
sph_blake512( &ctx.blake, input, 80 );
sph_blake512_close( &ctx.blake, hash );
memset(&hash[16], 0, 64);
sph_bmw512(&ctx.bmw, hash, dataLen);
sph_bmw512_close(&ctx.bmw, hash);
#if defined(__AES__)
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const char*)hash, dataLen*8 );
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const char*)hash, dataLen*8 );
#else
sph_groestl512(&ctx.groestl, hash, dataLen);
sph_groestl512_close(&ctx.groestl, hash);
@@ -127,20 +114,20 @@ void xevan_hash(void *output, const void *input)
sph_keccak512(&ctx.keccak, hash, dataLen);
sph_keccak512_close(&ctx.keccak, hash);
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)hash, dataLen );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)hash, dataLen );
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
(const byte*) hash, dataLen );
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
(const byte*) hash, dataLen );
sph_shavite512(&ctx.shavite, hash, dataLen);
sph_shavite512_close(&ctx.shavite, hash);
update_final_sd( &ctx.simd, (BitSequence *)hash,
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, dataLen*8 );
#if defined(__AES__)
update_final_echo( &ctx.echo, (BitSequence *) hash,
update_final_echo( &ctx.echo, (BitSequence *) hash,
(const BitSequence *) hash, dataLen*8 );
#else
sph_echo512(&ctx.echo, hash, dataLen);
@@ -159,15 +146,15 @@ void xevan_hash(void *output, const void *input)
sph_whirlpool(&ctx.whirlpool, hash, dataLen);
sph_whirlpool_close(&ctx.whirlpool, hash);
SHA512_Update( &ctx.sha512, hash, dataLen );
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
SHA512_Update( &ctx.sha512, hash, dataLen );
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
sph_haval256_5_close(&ctx.haval, hash);
memset(&hash[8], 0, dataLen - 32);
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
sph_blake512(&ctx.blake, hash, dataLen);
sph_blake512_close(&ctx.blake, hash);
@@ -176,11 +163,11 @@ void xevan_hash(void *output, const void *input)
sph_bmw512_close(&ctx.bmw, hash);
#if defined(__AES__)
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const BitSequence*)hash, dataLen*8 );
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const BitSequence*)hash, dataLen*8 );
#else
sph_groestl512(&ctx.groestl, hash, dataLen);
sph_groestl512_close(&ctx.groestl, hash);
sph_groestl512_close(&ctx.groestl, hash);
#endif
sph_skein512(&ctx.skein, hash, dataLen);
@@ -191,24 +178,25 @@ void xevan_hash(void *output, const void *input)
sph_keccak512(&ctx.keccak, hash, dataLen);
sph_keccak512_close(&ctx.keccak, hash);
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)hash, dataLen );
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
(const byte*) hash, dataLen );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)hash, dataLen );
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
(const byte*) hash, dataLen );
sph_shavite512(&ctx.shavite, hash, dataLen);
sph_shavite512_close(&ctx.shavite, hash);
update_final_sd( &ctx.simd, (BitSequence *)hash,
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, dataLen*8 );
#if defined(__AES__)
update_final_echo( &ctx.echo, (BitSequence *) hash,
update_final_echo( &ctx.echo, (BitSequence *) hash,
(const BitSequence *) hash, dataLen*8 );
#else
sph_echo512(&ctx.echo, hash, dataLen);
sph_echo512_close(&ctx.echo, hash);
sph_echo512(&ctx.echo, hash, dataLen);
sph_echo512_close(&ctx.echo, hash);
#endif
sph_hamsi512(&ctx.hamsi, hash, dataLen);
@@ -223,8 +211,8 @@ void xevan_hash(void *output, const void *input)
sph_whirlpool(&ctx.whirlpool, hash, dataLen);
sph_whirlpool_close(&ctx.whirlpool, hash);
SHA512_Update( &ctx.sha512, hash, dataLen );
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
SHA512_Update( &ctx.sha512, hash, dataLen );
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
sph_haval256_5_close(&ctx.haval, hash);
@@ -233,41 +221,33 @@ void xevan_hash(void *output, const void *input)
}
int scanhash_xevan( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t _ALIGN(64) hash[8];
uint32_t _ALIGN(64) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if (opt_benchmark)
ptarget[7] = 0x0cff;
mm128_bswap32_80( edata, pdata );
for (int k=0; k < 19; k++)
be32enc(&endiandata[k], pdata[k]);
xevan_blake512_midstate( endiandata );
do {
be32enc(&endiandata[19], nonce);
xevan_hash(hash, endiandata);
if (hash[7] <= Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
submit_solution( work, hash, mythr );
}
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
do
{
edata[19] = n;
xevan_hash( hash64, edata );
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n );
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < max_nonce && !work_restart[thr_id].restart );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#endif

View File

@@ -87,64 +87,40 @@ void x22i_8way_hash( void *output, const void *input )
unsigned char hashA7[64] __attribute__((aligned(32))) = {0};
x22i_8way_ctx_overlay ctx;
blake512_8way_init( &ctx.blake );
blake512_8way_update( &ctx.blake, input, 80 );
blake512_8way_close( &ctx.blake, vhash );
blake512_8way_full( &ctx.blake, vhash, input, 80 );
bmw512_8way_init( &ctx.bmw );
bmw512_8way_update( &ctx.bmw, vhash, 64 );
bmw512_8way_close( &ctx.bmw, vhash );
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
#if defined(__VAES__)
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, 512 );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, 512 );
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
#else
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0,
(const char*)hash0, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1,
(const char*)hash1, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2,
(const char*)hash2, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3,
(const char*)hash3, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash4,
(const char*)hash4, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash5,
(const char*)hash5, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash6,
(const char*)hash6, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash7,
(const char*)hash7, 512 );
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7 );
#endif
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, 64 );
skein512_8way_close( &ctx.skein, vhash );
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, 64 );
jh512_8way_close( &ctx.jh, vhash );
@@ -155,22 +131,16 @@ void x22i_8way_hash( void *output, const void *input )
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
luffa_4way_init( &ctx.luffa, 512 );
luffa_4way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
luffa_4way_init( &ctx.luffa, 512 );
luffa_4way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
luffa512_4way_full( &ctx.luffa, vhashA, vhashA, 64 );
luffa512_4way_full( &ctx.luffa, vhashB, vhashB, 64 );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 64 );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 64 );
cube_4way_full( &ctx.cube, vhashA, 512, vhashA, 64 );
cube_4way_full( &ctx.cube, vhashB, 512, vhashB, 64 );
#if defined(__VAES__)
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, 64 );
#else
@@ -207,17 +177,13 @@ void x22i_8way_hash( void *output, const void *input )
#endif
simd_4way_init( &ctx.simd, 512 );
simd_4way_update_close( &ctx.simd, vhashA, vhashA, 512 );
simd_4way_init( &ctx.simd, 512 );
simd_4way_update_close( &ctx.simd, vhashB, vhashB, 512 );
simd512_4way_full( &ctx.simd, vhashA, vhashA, 64 );
simd512_4way_full( &ctx.simd, vhashB, vhashB, 64 );
#if defined(__VAES__)
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhashA, vhashA, 512 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhashB, vhashB, 512 );
echo_4way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
echo_4way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
@@ -226,30 +192,22 @@ void x22i_8way_hash( void *output, const void *input )
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash0,
(const BitSequence*)hash0, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash1,
(const BitSequence*)hash1, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash2,
(const BitSequence*)hash2, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash3,
(const BitSequence*)hash3, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash4,
(const BitSequence*)hash4, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash5,
(const BitSequence*)hash5, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash6,
(const BitSequence*)hash6, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash7,
(const BitSequence*)hash7, 512 );
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)hash0, 64 );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)hash1, 64 );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)hash2, 64 );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, 64 );
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
(const BitSequence *)hash4, 64 );
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
(const BitSequence *)hash5, 64 );
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
(const BitSequence *)hash6, 64 );
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
(const BitSequence *)hash7, 64 );
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7 );
@@ -443,6 +401,55 @@ void x22i_8way_hash( void *output, const void *input )
sha256_8way_close( &ctx.sha256, output );
}
int scanhash_x22i_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[7*8]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x08ff;
InitializeSWIFFTX();
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
x22i_8way_hash( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) ) )
{
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
/*
int scanhash_x22i_8way( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
@@ -488,6 +495,7 @@ int scanhash_x22i_8way( struct work* work, uint32_t max_nonce,
*hashes_done = n - first_nonce;
return 0;
}
*/
#elif defined(X22I_4WAY)
@@ -531,33 +539,21 @@ void x22i_4way_hash( void *output, const void *input )
unsigned char hashA3[64] __attribute__((aligned(32))) = {0};
x22i_ctx_overlay ctx;
blake512_4way_init( &ctx.blake );
blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
blake512_4way_full( &ctx.blake, vhash, input, 80 );
bmw512_4way_init( &ctx.bmw );
bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0,
(const char*)hash0, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1,
(const char*)hash1, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2,
(const char*)hash2, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3,
(const char*)hash3, 512 );
groestl512_full( &ctx.groestl, (char*)hash0, (const char*)hash0, 512 );
groestl512_full( &ctx.groestl, (char*)hash1, (const char*)hash1, 512 );
groestl512_full( &ctx.groestl, (char*)hash2, (const char*)hash2, 512 );
groestl512_full( &ctx.groestl, (char*)hash3, (const char*)hash3, 512 );
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash );
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, 64 );
@@ -569,41 +565,29 @@ void x22i_4way_hash( void *output, const void *input )
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
luffa512_2way_full( &ctx.luffa, vhashA, vhashA, 64 );
luffa512_2way_full( &ctx.luffa, vhashB, vhashB, 64 );
cube_2way_init( &ctx.cube, 512, 16, 32 );
cube_2way_update_close( &ctx.cube, vhashA, vhashA, 64 );
cube_2way_init( &ctx.cube, 512, 16, 32 );
cube_2way_update_close( &ctx.cube, vhashB, vhashB, 64 );
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, 64 );
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, 64 );
shavite512_2way_full( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_2way_full( &ctx.shavite, vhashB, vhashB, 64 );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashA, vhashA, 512 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
dintrlv_2x128_512( hash0, hash1, vhashA );
dintrlv_2x128_512( hash2, hash3, vhashB );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash0,
(const BitSequence*)hash0, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash1,
(const BitSequence*)hash1, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash2,
(const BitSequence*)hash2, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash3,
(const BitSequence*)hash3, 512 );
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)hash0, 64 );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)hash1, 64 );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)hash2, 64 );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, 64 );
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -722,44 +706,47 @@ void x22i_4way_hash( void *output, const void *input )
int scanhash_x22i_4way( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[ 7*4 ]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x08ff;
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x08ff;
InitializeSWIFFTX();
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x22i_4way_hash( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if unlikely( ( hash7[ lane ] <= Htarg ) )
if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( likely( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce + 1;
} while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -167,40 +167,38 @@ void x22i_hash( void *output, const void *input )
memcpy(output, hash, 32);
}
int scanhash_x22i( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
int scanhash_x22i( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t hash[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t n = first_nonce;
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = n;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x08ff;
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], pdata[k]);
if ( bench ) ptarget[7] = 0x08ff;
mm128_bswap32_80( edata, pdata );
InitializeSWIFFTX();
do
{
pdata[19] = ++n;
be32enc( &endiandata[19], n );
x22i_hash( hash, endiandata );
if ( hash[7] < Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
submit_solution( work, hash, mythr );
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = pdata[19] - first_nonce;
return 0;
edata[19] = n;
x22i_hash( hash64, edata );
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n );
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
}
#endif

View File

@@ -530,6 +530,55 @@ void x25x_8way_hash( void *output, const void *input )
blake2s_8way_full_blocks( &ctx.blake2s, output, vhashX, 64*24 );
}
int scanhash_x25x_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[7*8]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x08ff;
InitializeSWIFFTX();
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
x25x_8way_hash( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) ) )
{
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
/*
int scanhash_x25x_8way( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
@@ -574,6 +623,7 @@ int scanhash_x25x_8way( struct work* work, uint32_t max_nonce,
*hashes_done = n - first_nonce;
return 0;
}
*/
#elif defined(X25X_4WAY)
@@ -614,9 +664,7 @@ void x25x_4way_hash( void *output, const void *input )
unsigned char vhashX[24][64*4] __attribute__ ((aligned (64)));
x25x_4way_ctx_overlay ctx __attribute__ ((aligned (64)));
blake512_4way_init( &ctx.blake );
blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
blake512_4way_full( &ctx.blake, vhash, input, 80 );
dintrlv_4x64_512( hash0[0], hash1[0], hash2[0], hash3[0], vhash );
bmw512_4way_init( &ctx.bmw );
@@ -624,24 +672,13 @@ void x25x_4way_hash( void *output, const void *input )
bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64_512( hash0[1], hash1[1], hash2[1], hash3[1], vhash );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0[2],
(const char*)hash0[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1[2],
(const char*)hash1[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2[2],
(const char*)hash2[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3[2],
(const char*)hash3[1], 512 );
groestl512_full( &ctx.groestl, (char*)hash0[2], (const char*)hash0[1], 512 );
groestl512_full( &ctx.groestl, (char*)hash1[2], (const char*)hash1[1], 512 );
groestl512_full( &ctx.groestl, (char*)hash2[2], (const char*)hash2[1], 512 );
groestl512_full( &ctx.groestl, (char*)hash3[2], (const char*)hash3[1], 512 );
intrlv_4x64_512( vhash, hash0[2], hash1[2], hash2[2], hash3[2] );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash );
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
dintrlv_4x64_512( hash0[3], hash1[3], hash2[3], hash3[3], vhash );
jh512_4way_init( &ctx.jh );
@@ -654,32 +691,20 @@ void x25x_4way_hash( void *output, const void *input )
keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64_512( hash0[5], hash1[5], hash2[5], hash3[5], vhash );
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash0[6],
(const BitSequence*)hash0[5], 64 );
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash1[6],
(const BitSequence*)hash1[5], 64 );
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash2[6],
(const BitSequence*)hash2[5], 64 );
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash3[6],
(const BitSequence*)hash3[5], 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash0[7],
(const byte*)hash0[6], 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash1[7],
(const byte*)hash1[6], 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash2[7],
(const byte*)hash2[6], 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash3[7],
(const byte*)hash3[6], 64 );
luffa_full( &ctx.luffa, (BitSequence*)hash0[6], 512,
(const BitSequence*)hash0[5], 64 );
luffa_full( &ctx.luffa, (BitSequence*)hash1[6], 512,
(const BitSequence*)hash1[5], 64 );
luffa_full( &ctx.luffa, (BitSequence*)hash2[6], 512,
(const BitSequence*)hash2[5], 64 );
luffa_full( &ctx.luffa, (BitSequence*)hash3[6], 512,
(const BitSequence*)hash3[5], 64 );
cubehash_full( &ctx.cube, (byte*)hash0[7], 512, (const byte*)hash0[6], 64 );
cubehash_full( &ctx.cube, (byte*)hash1[7], 512, (const byte*)hash1[6], 64 );
cubehash_full( &ctx.cube, (byte*)hash2[7], 512, (const byte*)hash2[6], 64 );
cubehash_full( &ctx.cube, (byte*)hash3[7], 512, (const byte*)hash3[6], 64 );
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) hash0[7], 64);
sph_shavite512_close(&ctx.shavite, hash0[8]);
@@ -693,31 +718,23 @@ void x25x_4way_hash( void *output, const void *input )
sph_shavite512(&ctx.shavite, (const void*) hash3[7], 64);
sph_shavite512_close(&ctx.shavite, hash3[8]);
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence*)hash0[9],
(const BitSequence*)hash0[8], 512 );
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence*)hash1[9],
(const BitSequence*)hash1[8], 512 );
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence*)hash2[9],
(const BitSequence*)hash2[8], 512 );
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence*)hash3[9],
(const BitSequence*)hash3[8], 512 );
simd_full( &ctx.simd, (BitSequence*)hash0[9],
(const BitSequence*)hash0[8], 512 );
simd_full( &ctx.simd, (BitSequence*)hash1[9],
(const BitSequence*)hash1[8], 512 );
simd_full( &ctx.simd, (BitSequence*)hash2[9],
(const BitSequence*)hash2[8], 512 );
simd_full( &ctx.simd, (BitSequence*)hash3[9],
(const BitSequence*)hash3[8], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash0[10],
(const BitSequence*)hash0[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash1[10],
(const BitSequence*)hash1[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash2[10],
(const BitSequence*)hash2[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash3[10],
(const BitSequence*)hash3[9], 512 );
echo_full( &ctx.echo, (BitSequence *)hash0[10], 512,
(const BitSequence *)hash0[ 9], 64 );
echo_full( &ctx.echo, (BitSequence *)hash1[10], 512,
(const BitSequence *)hash1[ 9], 64 );
echo_full( &ctx.echo, (BitSequence *)hash2[10], 512,
(const BitSequence *)hash2[ 9], 64 );
echo_full( &ctx.echo, (BitSequence *)hash3[10], 512,
(const BitSequence *)hash3[ 9], 64 );
intrlv_4x64_512( vhash, hash0[10], hash1[10], hash2[10], hash3[10] );
@@ -870,43 +887,46 @@ void x25x_4way_hash( void *output, const void *input )
int scanhash_x25x_4way( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[16*4] __attribute__ ((aligned (128)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[ 7*4 ]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
__m256i *noncev = (__m256i*)vdata + 9; // aligned
uint32_t n = first_nonce;
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x08ff;
if ( bench ) ptarget[7] = 0x08ff;
InitializeSWIFFTX();
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x25x_4way_hash( hash, vdata );
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
} while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -201,42 +201,38 @@ void x25x_hash( void *output, const void *input )
memcpy(output, &hash[24], 32);
}
int scanhash_x25x( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
int scanhash_x25x( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t hash[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t n = first_nonce;
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = n;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x08ff;
if ( bench ) ptarget[7] = 0x08ff;
mm128_bswap32_80( edata, pdata );
for (int k=0; k < 20; k++)
be32enc(&edata[k], pdata[k]);
InitializeSWIFFTX();
do
{
pdata[19] = ++n;
be32enc( &edata[19], n );
x25x_hash( hash, edata );
if ( hash[7] < Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
submit_solution( work, hash, mythr );
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = pdata[19] - first_nonce;
return 0;
edata[19] = n;
x25x_hash( hash64, edata );
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n );
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
}
#endif

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.0.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.2.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.12.0'
PACKAGE_STRING='cpuminer-opt 3.12.0'
PACKAGE_VERSION='3.12.2'
PACKAGE_STRING='cpuminer-opt 3.12.2'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.12.0 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.12.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.12.0:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.12.2:";;
esac
cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.12.0
cpuminer-opt configure 3.12.2
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.12.0, which was
It was created by cpuminer-opt $as_me 3.12.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.12.0'
VERSION='3.12.2'
cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.12.0, which was
This file was extended by cpuminer-opt $as_me 3.12.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.12.0
cpuminer-opt config.status 3.12.2
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.12.0])
AC_INIT([cpuminer-opt], [3.12.2])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -883,8 +883,6 @@ static double norm_diff_sum = 0.;
static uint32_t last_block_height = 0;
//static bool new_job = false;
static double last_targetdiff = 0.;
static double ref_rate_hi = 0.;
static double ref_rate_lo = 1e100;
#if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32))
static uint32_t hi_temp = 0;
#endif
@@ -932,7 +930,6 @@ void report_summary_log( bool force )
uint64_t accepts = accept_sum; accept_sum = 0;
uint64_t rejects = reject_sum; reject_sum = 0;
uint64_t stales = stale_sum; stale_sum = 0;
// int latency = latency_sum; latency_sum = 0;
memcpy( &start_time, &five_min_start, sizeof start_time );
memcpy( &five_min_start, &now, sizeof now );
@@ -943,34 +940,21 @@ void report_summary_log( bool force )
double share_time = (double)et.tv_sec + (double)et.tv_usec / 1e6;
double ghrate = global_hashrate;
double scaled_ghrate = ghrate;
double shrate = share_time == 0. ? 0. : diff_to_hash * last_targetdiff
* (double)(accepts) / share_time;
double sess_hrate = uptime.tv_sec == 0. ? 0. : diff_to_hash * norm_diff_sum
/ (double)uptime.tv_sec;
double scaled_shrate = shrate;
// int avg_latency = 0;
// double latency_pc = 0.;
double submit_rate = 0.;
double submit_rate = share_time == 0. ? 0. : (double)submits*60. / share_time;
char shr_units[4] = {0};
char ghr_units[4] = {0};
char sess_hr_units[4] = {0};
char et_str[24];
char upt_str[24];
// if ( submits ) avg_latency = latency / submits;
if ( share_time != 0. )
{
submit_rate = (double)submits*60. / share_time;
// latency_pc = (double)latency / (share_time * 10.);
}
if ( ghrate > ref_rate_hi ) ref_rate_hi = ghrate;
if ( ghrate < ref_rate_lo ) ref_rate_lo = ghrate;
scale_hash_for_display( &scaled_shrate, shr_units );
scale_hash_for_display( &scaled_ghrate, ghr_units );
scale_hash_for_display( &shrate, shr_units );
scale_hash_for_display( &ghrate, ghr_units );
scale_hash_for_display( &sess_hrate, sess_hr_units );
sprintf_et( et_str, et.tv_sec );
@@ -981,8 +965,26 @@ void report_summary_log( bool force )
submit_rate, (double)submitted_share_count*60. /
( (double)uptime.tv_sec + (double)uptime.tv_usec / 1e6 ) );
applog2( LOG_INFO, "Hash rate %7.2f%sh/s %7.2f%sh/s (%.2f%sh/s)",
scaled_shrate, shr_units, sess_hrate, sess_hr_units,
scaled_ghrate, ghr_units );
shrate, shr_units, sess_hrate, sess_hr_units,
ghrate, ghr_units );
if ( accepted_share_count < submitted_share_count )
{
double lost_ghrate = uptime.tv_sec == 0. ? 0.
: diff_to_hash * last_targetdiff
* (double)(submitted_share_count - accepted_share_count )
/ (double)uptime.tv_sec;
double lost_shrate = share_time == 0. ? 0.
: diff_to_hash * last_targetdiff * (double)(submits - accepts )
/ share_time;
char lshr_units[4] = {0};
char lghr_units[4] = {0};
scale_hash_for_display( &lost_shrate, lshr_units );
scale_hash_for_display( &lost_ghrate, lghr_units );
applog2( LOG_INFO, "Lost hash rate %7.2f%sh/s %7.2f%sh/s",
lost_shrate, lshr_units, lost_ghrate, lghr_units );
}
applog2( LOG_INFO,"Submitted %6d %6d",
submits, submitted_share_count );
applog2( LOG_INFO,"Accepted %6d %6d",
@@ -998,7 +1000,7 @@ void report_summary_log( bool force )
solved_block_count );
}
bool lowdiff_debug = true;
bool lowdiff_debug = false;
static int share_result( int result, struct work *null_work,
const char *reason )
@@ -1068,7 +1070,7 @@ static int share_result( int result, struct work *null_work,
else
{
rejected_share_count++;
if ( strstr( reason, "Low diff " ) ) lowdiff_debug = true;
lowdiff_debug = true;
}
}
@@ -1145,7 +1147,7 @@ static int share_result( int result, struct work *null_work,
bres, share_time, latency );
if ( have_stratum && !opt_quiet )
applog2( LOG_NOTICE, "Diff %.3g (%.3g%), %sBlock %d, %sJob %s" CL_WHT,
applog2( LOG_NOTICE, "Diff %.5g (%.3g%), %sBlock %d, %sJob %s" CL_WHT,
my_stats.share_diff, share_ratio, bcol, stratum.block_height,
scol, my_stats.job_id );
@@ -1163,13 +1165,15 @@ static int share_result( int result, struct work *null_work,
for ( int i = 0; i < 8; i++ )
be32enc( str2 + i, str1[7 - i] );
bin2hex( str3, (unsigned char*)str2, 12 );
applog2( LOG_INFO, "Share diff: %g, Hash: %s...", my_stats.share_diff, str3 );
applog2( LOG_INFO, "Share diff: %.5g, Hash: %s...",
my_stats.share_diff, str3 );
diff_to_target( str1, my_stats.target_diff );
for ( int i = 0; i < 8; i++ )
be32enc( str2 + i, str1[7 - i] );
bin2hex( str3, (unsigned char*)str2, 12 );
applog2( LOG_INFO, "Target diff: %g, Targ: %s...", str3 );
applog2( LOG_INFO, "Target diff: %.5g, Targ: %s...",
my_stats.target_diff, str3 );
}
if ( unlikely( opt_reset_on_stale && stale ) )
@@ -1710,9 +1714,9 @@ if ( lowdiff_debug )
{
uint32_t* h = (uint32_t*)hash;
uint32_t* t = (uint32_t*)work->target;
applog(LOG_INFO,"Hash[7:0}: %08x %08x %08x %08x %08x %08x %08x %08x",
applog(LOG_INFO,"Hash[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0]);
applog(LOG_INFO,"Targ[7:0}: %08x %08x %08x %08x %08x %08x %08x %08x",
applog(LOG_INFO,"Targ[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0]);
}
return true;
@@ -1733,6 +1737,18 @@ bool submit_lane_solution( struct work *work, const void *hash,
if ( !opt_quiet )
applog( LOG_NOTICE, "%d submitted by thread %d, lane %d, job %s",
submitted_share_count, thr->id, lane, work->job_id );
if ( lowdiff_debug )
{
uint32_t* h = (uint32_t*)hash;
uint32_t* t = (uint32_t*)work->target;
applog(LOG_INFO,"Hash[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0]);
applog(LOG_INFO,"Targ[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0]);
}
return true;
}
else
@@ -2117,7 +2133,6 @@ static void *miner_thread( void *userdata )
int lo_freq, hi_freq;
linux_cpu_hilo_freq( &lo_freq, &hi_freq );
memcpy( &cpu_temp_time, &tv_end, sizeof(cpu_temp_time) );
if ( temp > hi_temp ) hi_temp = temp;
if ( use_colors && ( temp >= 70 ) )
{
if ( temp >= 80 )
@@ -2129,6 +2144,7 @@ static void *miner_thread( void *userdata )
sprintf( tempstr, "%d C", temp );
applog( LOG_INFO,"CPU temp: curr %s (max %d), Freq: %.3f/%.3f GHz",
tempstr, hi_temp, (float)lo_freq / 1e6, (float)hi_freq/ 1e6 );
if ( temp > hi_temp ) hi_temp = temp;
}
}
#endif
@@ -2483,7 +2499,7 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
if ( !opt_quiet )
{
applog2( LOG_INFO, "%s: %s", algo_names[opt_algo], short_url );
applog2( LOG_INFO, "Diff: Net %.3g, Stratum %.3g, Target %.3g",
applog2( LOG_INFO, "Diff: Net %.5g, Stratum %.5g, Target %.5g",
net_diff, stratum_diff, last_targetdiff );
if ( likely( hr > 0. ) )

View File

@@ -121,12 +121,9 @@ do { \
// Horizontal vector testing
#define mm256_allbits0( a ) _mm256_testz_si256( a, a )
#define mm256_allbits1( a ) _mm256_testc_si256( a, m256_neg1 )
//broken
//#define mm256_allbitsne( a ) _mm256_testnzc_si256( a, m256_neg1 )
#define mm256_anybits0( a ) !mm256_allbits1( a )
#define mm256_allbits0( a ) _mm256_testc_si256( a, m256_neg1 )
#define mm256_allbits1( a ) _mm256_testz_si256( a, a )
#define mm256_anybits0( a ) !mm256_allbits1( a )
#define mm256_anybits1( a ) !mm256_allbits0( a )