This commit is contained in:
Jay D Dee
2019-07-02 15:10:38 -04:00
parent 0d48d573ce
commit 0d769ee0fe
53 changed files with 1755 additions and 1170 deletions

View File

@@ -15,7 +15,7 @@ void blakehash_4way(void *state, const void *input)
memcpy( &ctx, &blake_4w_ctx, sizeof ctx );
blake256r14_4way( &ctx, input + (64<<2), 16 );
blake256r14_4way_close( &ctx, vhash );
mm128_dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
}
int scanhash_blake_4way( struct work *work, uint32_t max_nonce,

View File

@@ -83,7 +83,7 @@ void blake2s_4way_hash( void *output, const void *input )
blake2s_4way_update( &ctx, input + (64<<2), 16 );
blake2s_4way_final( &ctx, vhash, BLAKE2S_OUTBYTES );
mm128_dintrlv_4x32( output, output+32, output+64, output+96,
dintrlv_4x32( output, output+32, output+64, output+96,
vhash, 256 );
}

View File

@@ -17,7 +17,7 @@ void blakecoin_4way_hash(void *state, const void *input)
blake256r8_4way( &ctx, input + (64<<2), 16 );
blake256r8_4way_close( &ctx, vhash );
mm128_dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
}
int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce,

View File

@@ -23,7 +23,7 @@ void decred_hash_4way( void *state, const void *input )
memcpy( &ctx, &blake_mid, sizeof(blake_mid) );
blake256_4way( &ctx, tail, tail_len );
blake256_4way_close( &ctx, vhash );
mm128_dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
}
int scanhash_decred_4way( struct work *work, uint32_t max_nonce,

View File

@@ -33,7 +33,7 @@ void myriad_4way_hash( void *output, const void *input )
myrgr_4way_ctx_holder ctx;
memcpy( &ctx, &myrgr_4way_ctx, sizeof(myrgr_4way_ctx) );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, input, 640 );
dintrlv_4x32( hash0, hash1, hash2, hash3, input, 640 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 640 );
memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) );
@@ -43,7 +43,7 @@ void myriad_4way_hash( void *output, const void *input )
memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 640 );
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
sha256_4way( &ctx.sha, vhash, 64 );
sha256_4way_close( &ctx.sha, output );
@@ -89,7 +89,7 @@ int scanhash_myriad_4way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 4; lane++ )
if ( hash7[ lane ] <= Htarg )
{
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;

View File

@@ -89,7 +89,7 @@ int scanhash_jha_4way( struct work *work, uint32_t max_nonce,
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[25]);
uint32_t lane_hash[8];
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -143,7 +143,7 @@ int scanhash_jha_4way( struct work *work, uint32_t max_nonce,
// && fulltest( hash+(i<<3), ptarget ) )
for ( int i = 0; i < 4; i++ ) if ( !( (hash7[i] & mask ) == 0 ) )
{
mm256_extract_lane_4x64( lane_hash, hash, i, 256 );
mm256_extr_lane_4x64( lane_hash, hash, i, 256 );
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
{
pdata[19] = n+i;

View File

@@ -21,8 +21,8 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
{
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[25]); // 3*8+1
uint32_t lane_hash[8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
@@ -41,7 +41,7 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 4; lane++ )
if ( ( ( hash7[ lane<<1 ] & 0xFFFFFF00 ) == 0 ) )
{
mm256_extract_lane_4x64( lane_hash, hash, lane, 256 );
mm256_extr_lane_4x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
{
pdata[19] = n + lane;

View File

@@ -36,7 +36,7 @@ void lyra2h_4way_hash( void *state, const void *input )
blake256_4way( &ctx_blake, input + (64*4), 16 );
blake256_4way_close( &ctx_blake, vhash );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
LYRA2Z( lyra2h_4way_matrix, state, 32, hash0, 32, hash0, 32,
16, 16, 16 );

View File

@@ -78,7 +78,7 @@ void lyra2rev2_4way_hash( void *state, const void *input )
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
bmw256_4way( &ctx.bmw, vhash, 32 );
bmw256_4way_close( &ctx.bmw, state );
@@ -90,7 +90,7 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t lane_hash[8];
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -116,7 +116,7 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
{
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;

View File

@@ -92,7 +92,7 @@ int scanhash_lyra2rev3_8way( struct work *work, uint32_t max_nonce,
uint32_t hash[8*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<3]);
uint32_t lane_hash[8];
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -115,7 +115,7 @@ int scanhash_lyra2rev3_8way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 8; lane++ ) if ( hash7[lane] <= Htarg )
{
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
mm256_extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
@@ -161,7 +161,7 @@ void lyra2rev3_4way_hash( void *state, const void *input )
blake256_4way( &ctx.blake, input, 80 );
blake256_4way_close( &ctx.blake, vhash );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
LYRA2REV3( l2v3_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
LYRA2REV3( l2v3_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 );
@@ -181,7 +181,7 @@ void lyra2rev3_4way_hash( void *state, const void *input )
LYRA2REV3( l2v3_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
LYRA2REV3( l2v3_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
bmw256_4way( &ctx.bmw, vhash, 32 );
bmw256_4way_close( &ctx.bmw, state );
}
@@ -192,7 +192,7 @@ int scanhash_lyra2rev3_4way( struct work *work, uint32_t max_nonce,
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t lane_hash[8];
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -214,7 +214,7 @@ int scanhash_lyra2rev3_4way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
{
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;

View File

@@ -36,7 +36,7 @@ void lyra2z_4way_hash( void *state, const void *input )
blake256_4way( &ctx_blake, input + (64*4), 16 );
blake256_4way_close( &ctx_blake, vhash );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
LYRA2Z( lyra2z_4way_matrix, state , 32, hash0, 32, hash0, 32, 8, 8, 8 );
LYRA2Z( lyra2z_4way_matrix, state+32, 32, hash1, 32, hash1, 32, 8, 8, 8 );

View File

@@ -168,7 +168,7 @@ int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
uint32_t _ALIGN(128) edata[36];
uint32_t vdata[4][36] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[25]);
uint32_t lane_hash[8];
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
@@ -217,7 +217,7 @@ int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[ lane<<1 ] < Htarg )
{
mm256_extract_lane_4x64( lane_hash, hash, lane, 256 );
mm256_extr_lane_4x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;

View File

@@ -207,6 +207,7 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
SHA512_Update( &ctx2.sha512, data_p64, 80 - M7_MIDSTATE_LEN );
SHA512_Final( (unsigned char*) (bhash[1]), &ctx2.sha512 );
sph_keccak512( &ctx2.keccak, data_p64, 80 - M7_MIDSTATE_LEN );
sph_keccak512_close( &ctx2.keccak, (void*)(bhash[2]) );
@@ -222,18 +223,18 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
sph_ripemd160( &ctx2.ripemd, data_p64, 80 - M7_MIDSTATE_LEN );
sph_ripemd160_close( &ctx2.ripemd, (void*)(bhash[6]) );
mpz_import(bns0, a, -1, p, -1, 0, bhash[0]);
mpz_import(bns0, a, -1, p, -1, 0, bhash[0]);
mpz_set(bns1, bns0);
mpz_set(product, bns0);
for ( i=1; i < 7; i++ )
mpz_set(product, bns0);
for ( i=1; i < 7; i++ )
{
mpz_import(bns0, a, -1, p, -1, 0, bhash[i]);
mpz_add(bns1, bns1, bns0);
mpz_mul(product, product, bns0);
mpz_import(bns0, a, -1, p, -1, 0, bhash[i]);
mpz_add(bns1, bns1, bns0);
mpz_mul(product, product, bns0);
}
mpz_mul(product, product, bns1);
mpz_mul(product, product, product);
mpz_mul(product, product, product);
bytes = mpz_sizeinbase(product, 256);
mpz_export((void *)bdata, NULL, -1, 1, 0, 0, product);
@@ -243,27 +244,27 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
digits=(int)((sqrt((double)(n/2))*(1.+EPS))/9000+75);
mp_bitcnt_t prec = (long int)(digits*BITS_PER_DIGIT+16);
mpf_set_prec_raw(magifpi, prec);
mpf_set_prec_raw(mptmp, prec);
mpf_set_prec_raw(mpt1, prec);
mpf_set_prec_raw(mpt2, prec);
mpf_set_prec_raw(magifpi, prec);
mpf_set_prec_raw(mptmp, prec);
mpf_set_prec_raw(mpt1, prec);
mpf_set_prec_raw(mpt2, prec);
usw_ = sw2_(n/2);
mpzscale = 1;
mpzscale = 1;
mpz_set_ui(magisw, usw_);
for ( i = 0; i < 5; i++ )
{
mpf_set_d(mpt1, 0.25*mpzscale);
mpf_sub(mpt1, mpt1, mpt2);
mpf_sub(mpt1, mpt1, mpt2);
mpf_abs(mpt1, mpt1);
mpf_div(magifpi, magifpi0, mpt1);
mpf_pow_ui(mptmp, mpten, digits >> 1);
mpf_mul(magifpi, magifpi, mptmp);
mpz_set_f(magipi, magifpi);
mpz_set_f(magipi, magifpi);
mpz_add(magipi,magipi,magisw);
mpz_add(product,product,magipi);
mpz_import(bns0, b, -1, p, -1, 0, (void*)(hash));
mpz_import(bns0, b, -1, p, -1, 0, (void*)(hash));
mpz_add(bns1, bns1, bns0);
mpz_mul(product,product,bns1);
mpz_cdiv_q (product, product, bns0);
@@ -275,18 +276,18 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
SHA256_Init( &ctxf_sha256 );
SHA256_Update( &ctxf_sha256, bdata, bytes );
SHA256_Final( (unsigned char*) hash, &ctxf_sha256 );
}
}
const unsigned char *hash_ = (const unsigned char *)hash;
const unsigned char *target_ = (const unsigned char *)ptarget;
for ( i = 31; i >= 0; i-- )
const unsigned char *hash_ = (const unsigned char *)hash;
const unsigned char *target_ = (const unsigned char *)ptarget;
for ( i = 31; i >= 0; i-- )
{
if ( hash_[i] != target_[i] )
{
rc = hash_[i] < target_[i];
break;
}
}
if ( hash_[i] != target_[i] )
{
rc = hash_[i] < target_[i];
break;
}
}
if ( unlikely(rc) )
{
if ( opt_debug )
@@ -299,15 +300,15 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
hash_str,
target_str);
}
work_set_target_ratio( work, hash );
pdata[19] = data[19];
goto out;
}
submit_solution( work, hash, mythr );
}
} while (n < max_nonce && !work_restart[thr_id].restart);
pdata[19] = n;
out:
// can this be skipped after finding a share? Seems to work ok.
//out:
mpf_set_prec_raw(magifpi, prec0);
mpf_set_prec_raw(magifpi0, prec0);
mpf_set_prec_raw(mptmp, prec0);

View File

@@ -70,7 +70,7 @@ int scanhash_nist5_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[25]);
uint32_t lane_hash[8];
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
@@ -122,7 +122,7 @@ int scanhash_nist5_4way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 4; lane++ )
if ( ( hash7[ lane ] & mask ) == 0 )
{
mm256_extract_lane_4x64( lane_hash, hash, lane, 256 );
mm256_extr_lane_4x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;

View File

@@ -575,7 +575,7 @@ int scanhash_hmq1725_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
// uint32_t *hash7 = &(hash[25]);
// uint32_t lane_hash[8];
// uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;

View File

@@ -191,7 +191,7 @@ int scanhash_quark_4way( struct work *work, uint32_t max_nonce,
for ( int i = 0; i < 4; i++ )
if ( ( hash7[ i<<1 ] & 0xFFFFFF00 ) == 0 )
{
mm256_extract_lane_4x64( lane_hash, hash, i, 256 );
mm256_extr_lane_4x64( lane_hash, hash, i, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n+i;

View File

@@ -118,7 +118,7 @@ int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
for ( int i = 0; i < 8; i++ ) if ( !( hash7[ i ] & mask ) )
{
// deinterleave hash for lane
mm256_extract_lane_8x32( lane_hash, hash, i, 256 );
mm256_extr_lane_8x32( lane_hash, hash, i, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[27] = n + i;

View File

@@ -36,6 +36,7 @@ int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
{
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
@@ -79,8 +80,7 @@ int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
if ( !( hash7[ lane ] & mask ) )
{
// deinterleave hash for lane
uint32_t lane_hash[8];
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
mm256_extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
@@ -130,7 +130,7 @@ int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t lane_hash[8];
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
@@ -168,7 +168,7 @@ int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 4; lane++ )
if ( !( hash7[ lane ] & mask ) )
{
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{

View File

@@ -99,7 +99,7 @@ int scanhash_sha256t_11way( struct work *work, uint32_t max_nonce,
for ( i = 0; i < 8; i++ ) if ( !( hash7[ i ] & mask ) )
{
// deinterleave hash for lane
mm256_extract_lane_8x32( lane_hash, hashx, i, 256 );
mm256_extr_lane_8x32( lane_hash, hashx, i, 256 );
if ( fulltest( lane_hash, ptarget ) )
{
pdata[19] = n + i;
@@ -111,7 +111,7 @@ int scanhash_sha256t_11way( struct work *work, uint32_t max_nonce,
for( i = 0; i < 2; i++ ) if ( !(hash7[ 0] & mask ) )
{
mm64_extract_lane_2x32( lane_hash, hashy, i, 256 );
mm64_extr_lane_2x32( lane_hash, hashy, i, 256 );
if ( fulltest( lane_hash, ptarget ) )
{
pdata[19] = n + 8 + i;
@@ -204,7 +204,7 @@ int scanhash_sha256t_8way( struct work *work, uint32_t max_nonce,
if ( !( hash7[ lane ] & mask ) )
{
// deinterleave hash for lane
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
mm256_extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
@@ -287,7 +287,7 @@ int scanhash_sha256t_4way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 4; lane++ )
if ( !( hash7[ lane ] & mask ) )
{
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;

View File

@@ -48,7 +48,7 @@ void skeinhash_4way( void *state, const void *input )
SHA256_Update( &ctx_sha256, (unsigned char*)hash3, 64 );
SHA256_Final( (unsigned char*)hash3, &ctx_sha256 );
mm128_intrlv_4x32( state, hash0, hash1, hash2, hash3, 256 );
intrlv_4x32( state, hash0, hash1, hash2, hash3, 256 );
#else
mm256_rintrlv_4x64_4x32( vhash32, vhash64, 512 );
@@ -63,7 +63,7 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
{
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8];
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -84,7 +84,7 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 4; lane++ )
if ( hash7[ lane ] <= Htarg )
{
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
{
pdata[19] = n + lane;

View File

@@ -23,29 +23,41 @@ int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[25]);
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[25]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
// __m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
uint32_t *noncep = vdata + 73; // 9*8 + 1
mm256_bswap_intrlv80_4x64( vdata, pdata );
swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
// *noncev = mm256_intrlv_blend_32( mm256_bswap_32(
// _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
skein2hash_4way( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( hash7[ lane<<1 ] <= Htarg )
{
uint32_t lane_hash[8];
mm256_extract_lane_4x64( lane_hash, hash, lane, 256 );
mm256_extr_lane_4x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;

View File

@@ -171,18 +171,14 @@ int scanhash_c11_4way( struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
uint32_t *noncep = vdata + 73; // 9*8 + 1
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
// big endian encode 0..18 uint32_t, 64 bits at a time
swab32_array( endiandata, pdata, 20 );
uint64_t *edata = (uint64_t*)endiandata;
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
mm256_bswap_intrlv80_4x64( vdata, pdata );
for (int m=0; m < 6; m++)
if (Htarg <= htmax[m])
@@ -190,10 +186,8 @@ int scanhash_c11_4way( struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
c11_4way_hash( hash, vdata );
pdata[19] = n;

View File

@@ -64,13 +64,12 @@ int scanhash_tribus_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t _ALIGN(128) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t n = pdata[19];
uint32_t *noncep = vdata + 73; // 9*8 + 1
__m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
uint64_t htmax[] = { 0,
@@ -87,14 +86,7 @@ int scanhash_tribus_4way( struct work *work, uint32_t max_nonce,
0xFFFF0000,
0 };
// we need bigendian data...
for ( int i = 0; i < 20; i++ )
{
be32enc( &endiandata[i], pdata[i] );
}
uint64_t *edata = (uint64_t*)endiandata;
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
mm256_bswap_intrlv80_4x64( vdata, pdata );
// precalc midstate
// doing it one way then then interleaving would be faster but too
@@ -108,10 +100,8 @@ int scanhash_tribus_4way( struct work *work, uint32_t max_nonce,
{
uint32_t mask = masks[m];
do {
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
tribus_hash_4way( hash, vdata );

View File

@@ -170,18 +170,14 @@ int scanhash_x11_4way( struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
uint32_t *noncep = vdata + 73; // 9*8 + 1
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
// big endian encode 0..18 uint32_t, 64 bits at a time
swab32_array( endiandata, pdata, 20 );
uint64_t *edata = (uint64_t*)endiandata;
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
mm256_bswap_intrlv80_4x64( vdata, pdata );
for (int m=0; m < 6; m++)
if (Htarg <= htmax[m])
@@ -189,10 +185,8 @@ int scanhash_x11_4way( struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x11_4way_hash( hash, vdata );
pdata[19] = n;

View File

@@ -171,24 +171,19 @@ int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
uint32_t *noncep = vdata + 73; // 9*8 + 1
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
// big endian encode 0..18 uint32_t, 64 bits at a time
swab32_array( endiandata, pdata, 20 );
uint64_t *edata = (uint64_t*)endiandata;
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
mm256_bswap_intrlv80_4x64( vdata, pdata );
for (int m=0; m < 6; m++)
if (Htarg <= htmax[m])
@@ -196,10 +191,8 @@ int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x11gost_4way_hash( hash, vdata );
pdata[19] = n;

View File

@@ -78,29 +78,23 @@ int scanhash_skunk_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *noncep = vdata + 73; // 9*8 + 1
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id; // thr_id arg is deprecated
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if ( opt_benchmark )
((uint32_t*)ptarget)[7] = 0x0cff;
for ( int k = 0; k < 19; k++ )
be32enc( &endiandata[k], pdata[k] );
uint64_t *edata = (uint64_t*)endiandata;
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
mm256_bswap_intrlv80_4x64( vdata, pdata );
do
{
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
skunk_4way_hash( hash, vdata );
pdata[19] = n;

View File

@@ -189,12 +189,11 @@ int scanhash_x13_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *noncep = vdata + 73; // 9*8 + 1
__m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
@@ -202,11 +201,7 @@ int scanhash_x13_4way( struct work *work, uint32_t max_nonce,
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
// big endian encode 0..18 uint32_t, 64 bits at a time
swab32_array( endiandata, pdata, 20 );
uint64_t *edata = (uint64_t*)endiandata;
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
mm256_bswap_intrlv80_4x64( vdata, pdata );
for ( int m=0; m < 6; m++ )
if ( Htarg <= htmax[m] )
@@ -214,10 +209,8 @@ int scanhash_x13_4way( struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x13_4way_hash( hash, vdata );
pdata[19] = n;

View File

@@ -166,7 +166,7 @@ void x13sm3_4way_hash( void *state, const void *input )
update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, 512 );
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
// SM3 parallel 32 bit
uint32_t sm3_vhash[32*4] __attribute__ ((aligned (64)));
@@ -182,7 +182,7 @@ void x13sm3_4way_hash( void *state, const void *input )
sm3_4way( &ctx.sm3, vhash, 64 );
sm3_4way_close( &ctx.sm3, sm3_vhash );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 );
dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 );
// Hamsi parallel 4x32x2
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
@@ -214,12 +214,11 @@ int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *noncep = vdata + 73; // 9*8 + 1
__m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
@@ -227,11 +226,7 @@ int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce,
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
// big endian encode 0..18 uint32_t, 64 bits at a time
swab32_array( endiandata, pdata, 20 );
uint64_t *edata = (uint64_t*)endiandata;
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
mm256_bswap_intrlv80_4x64( vdata, pdata );
blake512_4way_init( &x13sm3_ctx_mid );
blake512_4way( &x13sm3_ctx_mid, vdata, 64 );
@@ -242,10 +237,8 @@ int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x13sm3_4way_hash( hash, vdata );
pdata[19] = n;

View File

@@ -43,7 +43,7 @@ void polytimos_4way_hash( void *output, const void *input )
shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash32, 64 );
shabal512_4way_close( &ctx.shabal, vhash32 );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash32, 512 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash32, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash0,

View File

@@ -54,10 +54,10 @@ void veltor_4way_hash( void *output, const void *input )
sph_shavite512( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 );
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
sph_gost512( &ctx.gost, hash0, 64 );
sph_gost512_close( &ctx.gost, hash0 );
@@ -82,31 +82,24 @@ int scanhash_veltor_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *noncep = vdata + 73; // 9*8 + 1
__m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if ( opt_benchmark )
ptarget[7] = 0x0cff;
for ( int i=0; i < 19; i++ )
{
be32enc( &endiandata[i], pdata[i] );
}
uint64_t *edata = (uint64_t*)endiandata;
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
mm256_bswap_intrlv80_4x64( vdata, pdata );
do
{
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
veltor_4way_hash( hash, vdata );
pdata[19] = n;

View File

@@ -183,10 +183,9 @@ void x14_4way_hash( void *state, const void *input )
sph_fugue512_close( &ctx.fugue, hash3 );
// 14 Shabal, parallel 32 bit
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, state );
}
int scanhash_x14_4way( struct work *work, uint32_t max_nonce,
@@ -194,12 +193,11 @@ int scanhash_x14_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *noncep = vdata + 73; // 9*8 + 1
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id; // thr_id arg is deprecated
uint64_t htmax[] = { 0, 0xF, 0xFF,
@@ -207,11 +205,7 @@ int scanhash_x14_4way( struct work *work, uint32_t max_nonce,
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
// big endian encode 0..18 uint32_t, 64 bits at a time
swab32_array( endiandata, pdata, 20 );
uint64_t *edata = (uint64_t*)endiandata;
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
mm256_bswap_intrlv80_4x64( vdata, pdata );
for ( int m=0; m < 6; m++ )
if ( Htarg <= htmax[m] )
@@ -219,10 +213,8 @@ int scanhash_x14_4way( struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x14_4way_hash( hash, vdata );
pdata[19] = n;
@@ -234,7 +226,7 @@ int scanhash_x14_4way( struct work *work, uint32_t max_nonce,
{
// deinterleave hash for lane
uint32_t lane_hash[8];
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{

View File

@@ -186,10 +186,10 @@ void x15_4way_hash( void *state, const void *input )
sph_fugue512_close( &ctx.fugue, hash3 );
// 14 Shabal, parallel 32 bit
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
// 15 Whirlpool
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
@@ -218,12 +218,11 @@ int scanhash_x15_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *noncep = vdata + 73; // 9*8 + 1
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id; // thr_id arg is deprecated
uint64_t htmax[] = { 0, 0xF, 0xFF,
@@ -231,11 +230,7 @@ int scanhash_x15_4way( struct work *work, uint32_t max_nonce,
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
// big endian encode 0..18 uint32_t, 64 bits at a time
swab32_array( endiandata, pdata, 20 );
uint64_t *edata = (uint64_t*)endiandata;
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
mm256_bswap_intrlv80_4x64( vdata, pdata );
for ( int m=0; m < 6; m++ )
if ( Htarg <= htmax[m] )
@@ -243,10 +238,8 @@ int scanhash_x15_4way( struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x15_4way_hash( hash, vdata );
pdata[19] = n;

View File

@@ -248,11 +248,11 @@ void x16r_4way_hash( void* output, const void* input )
sph_fugue512_close( &ctx.fugue, hash3 );
break;
case SHABAL:
mm128_intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, size );
shabal512_4way_close( &ctx.shabal, vhash );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case WHIRLPOOL:
sph_whirlpool_init( &ctx.whirlpool );

View File

@@ -390,7 +390,7 @@ void sonoa_4way_hash( void *state, const void *input )
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 );
@@ -438,7 +438,7 @@ void sonoa_4way_hash( void *state, const void *input )
shabal512_4way( &ctx.shabal, vhashB, 64 );
shabal512_4way_close( &ctx.shabal, vhash );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
@@ -522,13 +522,13 @@ void sonoa_4way_hash( void *state, const void *input )
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
@@ -635,13 +635,13 @@ void sonoa_4way_hash( void *state, const void *input )
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
@@ -769,13 +769,13 @@ void sonoa_4way_hash( void *state, const void *input )
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
@@ -807,9 +807,9 @@ int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t lane_hash[8];
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
@@ -837,7 +837,7 @@ int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 4; lane++ )
if ( ( ( hash7[ lane ] & mask ) == 0 ) )
{
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;

View File

@@ -124,8 +124,8 @@ void x17_4way_hash( void *state, const void *input )
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
mm256_dintrlv_2x128( hash0, hash1, vhashA, 512 );
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
mm256_dintrlv_2x128_512( hash0, hash1, vhashA );
mm256_dintrlv_2x128_512( hash2, hash3, vhashB );
// 11 Echo serial
init_echo( &ctx.echo, 512 );
@@ -165,13 +165,13 @@ void x17_4way_hash( void *state, const void *input )
sph_fugue512_close( &ctx.fugue, hash3 );
// 14 Shabal, parallel 4 way 32 bit
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
// 15 Whirlpool serial
sph_whirlpool_init( &ctx.whirlpool );
@@ -206,9 +206,9 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t lane_hash[8];
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
@@ -235,7 +235,7 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 4; lane++ )
if ( ( hash7[ lane ] & mask ) == 0 )
{
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;

View File

@@ -161,13 +161,13 @@ void xevan_4way_hash( void *output, const void *input )
sph_fugue512_close( &ctx.fugue, hash3 );
// Parallel 4way 32 bit
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, dataLen );
shabal512_4way_close( &ctx.shabal, vhash );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
// Serial
sph_whirlpool_init( &ctx.whirlpool );
@@ -295,13 +295,13 @@ void xevan_4way_hash( void *output, const void *input )
sph_fugue512( &ctx.fugue, hash3, dataLen );
sph_fugue512_close( &ctx.fugue, hash3 );
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, dataLen );
shabal512_4way_close( &ctx.shabal, vhash );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
@@ -333,9 +333,9 @@ int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t lane_hash[8];
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
int thr_id = mythr->id; // thr_id arg is deprecated
@@ -357,7 +357,7 @@ int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 4; lane++ )
if ( hash7[ lane ] <= Htarg )
{
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;