mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.9.5
This commit is contained in:
@@ -69,7 +69,7 @@ void c11_4way_hash( void *state, const void *input )
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// 3 Groestl
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -81,7 +81,7 @@ void c11_4way_hash( void *state, const void *input )
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
// 4way
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// 4 JH
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
@@ -96,16 +96,16 @@ void c11_4way_hash( void *state, const void *input )
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
// Serial
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// 7 Luffa
|
||||
mm256_interleave_2x128( vhash, hash0, hash1, 512 );
|
||||
mm256_interleave_2x128( vhashB, hash2, hash3, 512 );
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
mm256_intrlv_2x128( vhashB, hash2, hash3, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||
|
||||
// 8 Cubehash
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
|
||||
@@ -133,13 +133,13 @@ void c11_4way_hash( void *state, const void *input )
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
|
||||
// 10 Simd
|
||||
mm256_interleave_2x128( vhash, hash0, hash1, 512 );
|
||||
mm256_interleave_2x128( vhashB, hash2, hash3, 512 );
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
mm256_intrlv_2x128( vhashB, hash2, hash3, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||
|
||||
// 11 Echo
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
@@ -160,8 +160,8 @@ void c11_4way_hash( void *state, const void *input )
|
||||
memcpy( state+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_c11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_c11_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
@@ -170,8 +170,7 @@ int scanhash_c11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
@@ -183,7 +182,7 @@ int scanhash_c11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
for (int m=0; m < 6; m++)
|
||||
if (Htarg <= htmax[m])
|
||||
@@ -201,20 +200,18 @@ int scanhash_c11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -14,8 +14,8 @@ bool register_c11_algo( algo_gate_t* gate );
|
||||
|
||||
void c11_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_c11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_c11_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_c11_4way_ctx();
|
||||
|
||||
@@ -23,8 +23,8 @@ void init_c11_4way_ctx();
|
||||
|
||||
void c11_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_c11( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_c11( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_c11_ctx();
|
||||
|
||||
|
||||
@@ -137,8 +137,8 @@ void c11_hash( void *output, const void *input )
|
||||
memcpy(output, hash+64, 32);
|
||||
}
|
||||
|
||||
int scanhash_c11( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_c11( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
@@ -147,6 +147,7 @@ int scanhash_c11( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
if (opt_benchmark)
|
||||
|
||||
@@ -44,12 +44,13 @@ extern void freshhash(void* output, const void* input, uint32_t len)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_fresh(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_fresh( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t len = 80;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
||||
@@ -87,18 +87,18 @@ void timetravel_4way_hash(void *output, const void *input)
|
||||
blake512_4way( &ctx.blake, vhashA, dataLen );
|
||||
blake512_4way_close( &ctx.blake, vhashB );
|
||||
if ( i == 7 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 1:
|
||||
bmw512_4way( &ctx.bmw, vhashA, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||
if ( i == 7 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 2:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, dataLen<<3 );
|
||||
@@ -112,46 +112,46 @@ void timetravel_4way_hash(void *output, const void *input)
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, dataLen<<3 );
|
||||
if ( i != 7 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 3:
|
||||
skein512_4way( &ctx.skein, vhashA, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
if ( i == 7 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 4:
|
||||
jh512_4way( &ctx.jh, vhashA, dataLen );
|
||||
jh512_4way_close( &ctx.jh, vhashB );
|
||||
if ( i == 7 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 5:
|
||||
keccak512_4way( &ctx.keccak, vhashA, dataLen );
|
||||
keccak512_4way_close( &ctx.keccak, vhashB );
|
||||
if ( i == 7 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 6:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
mm256_interleave_2x128( vhashA, hash0, hash1, dataLen<<3 );
|
||||
mm256_intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
mm256_interleave_2x128( vhashA, hash2, hash3, dataLen<<3 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
mm256_intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhashA, dataLen<<3 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 );
|
||||
if ( i != 7 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 7:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
|
||||
(const byte*)hash0, dataLen );
|
||||
@@ -165,7 +165,7 @@ void timetravel_4way_hash(void *output, const void *input)
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
|
||||
(const byte*)hash3, dataLen );
|
||||
if ( i != 7 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
default:
|
||||
@@ -180,8 +180,8 @@ void timetravel_4way_hash(void *output, const void *input)
|
||||
memcpy( output+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_timetravel_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
@@ -190,10 +190,9 @@ int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
int i;
|
||||
|
||||
@@ -216,7 +215,7 @@ int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
}
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
@@ -229,17 +228,17 @@ int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );
|
||||
} while ( ( n < max_nonce ) && !(*restart) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -22,8 +22,8 @@ bool register_timetravel_algo( algo_gate_t* gate );
|
||||
|
||||
void timetravel_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_timetravel_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_tt8_4way_ctx();
|
||||
|
||||
@@ -31,8 +31,8 @@ void init_tt8_4way_ctx();
|
||||
|
||||
void timetravel_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_timetravel( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_timetravel( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_tt8_ctx();
|
||||
|
||||
|
||||
@@ -210,14 +210,14 @@ void timetravel_hash(void *output, const void *input)
|
||||
memcpy(output, &hash[16 * (TT8_FUNC_COUNT - 1)], 32);
|
||||
}
|
||||
|
||||
int scanhash_timetravel( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_timetravel( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
@@ -93,18 +93,18 @@ void timetravel10_4way_hash(void *output, const void *input)
|
||||
blake512_4way( &ctx.blake, vhashA, dataLen );
|
||||
blake512_4way_close( &ctx.blake, vhashB );
|
||||
if ( i == 9 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 1:
|
||||
bmw512_4way( &ctx.bmw, vhashA, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||
if ( i == 9 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 2:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, dataLen<<3 );
|
||||
@@ -118,46 +118,46 @@ void timetravel10_4way_hash(void *output, const void *input)
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, dataLen<<3 );
|
||||
if ( i != 9 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 3:
|
||||
skein512_4way( &ctx.skein, vhashA, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
if ( i == 9 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 4:
|
||||
jh512_4way( &ctx.jh, vhashA, dataLen );
|
||||
jh512_4way_close( &ctx.jh, vhashB );
|
||||
if ( i == 9 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 5:
|
||||
keccak512_4way( &ctx.keccak, vhashA, dataLen );
|
||||
keccak512_4way_close( &ctx.keccak, vhashB );
|
||||
if ( i == 9 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 6:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
mm256_interleave_2x128( vhashA, hash0, hash1, dataLen<<3 );
|
||||
mm256_intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
mm256_interleave_2x128( vhashA, hash2, hash3, dataLen<<3 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
mm256_intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhashA, dataLen<<3 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 );
|
||||
if ( i != 9 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 7:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
|
||||
(const byte*)hash0, dataLen );
|
||||
@@ -171,11 +171,11 @@ void timetravel10_4way_hash(void *output, const void *input)
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
|
||||
(const byte*)hash3, dataLen );
|
||||
if ( i != 9 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 8:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
sph_shavite512( &ctx.shavite, hash0, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
@@ -189,21 +189,21 @@ void timetravel10_4way_hash(void *output, const void *input)
|
||||
sph_shavite512( &ctx.shavite, hash3, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
if ( i != 9 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 9:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
mm256_interleave_2x128( vhashA, hash0, hash1, dataLen<<3 );
|
||||
mm256_intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 );
|
||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
mm256_interleave_2x128( vhashA, hash2, hash3, dataLen<<3 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
mm256_intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhashA, dataLen<<3 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 );
|
||||
if ( i != 9 )
|
||||
mm256_interleave_4x64( vhashB,
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
default:
|
||||
@@ -218,8 +218,8 @@ void timetravel10_4way_hash(void *output, const void *input)
|
||||
memcpy( output+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_timetravel10_4way( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done )
|
||||
int scanhash_timetravel10_4way( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
@@ -228,9 +228,8 @@ int scanhash_timetravel10_4way( int thr_id, struct work *work,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
int i;
|
||||
@@ -254,7 +253,7 @@ int scanhash_timetravel10_4way( int thr_id, struct work *work,
|
||||
}
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
@@ -267,16 +266,16 @@ int scanhash_timetravel10_4way( int thr_id, struct work *work,
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );
|
||||
} while ( ( n < max_nonce ) && !(*restart) );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -21,8 +21,8 @@ bool register_timetravel10_algo( algo_gate_t* gate );
|
||||
|
||||
void timetravel10_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_timetravel10_4way( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done );
|
||||
int scanhash_timetravel10_4way( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_tt10_4way_ctx();
|
||||
|
||||
@@ -30,8 +30,8 @@ void init_tt10_4way_ctx();
|
||||
|
||||
void timetravel10_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_timetravel10( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_timetravel10( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_tt10_ctx();
|
||||
|
||||
|
||||
@@ -242,13 +242,14 @@ void timetravel10_hash(void *output, const void *input)
|
||||
memcpy(output, &hash[16 * (TT10_FUNC_COUNT - 1)], 32);
|
||||
}
|
||||
|
||||
int scanhash_timetravel10( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_timetravel10( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
||||
@@ -37,7 +37,7 @@ void tribus_hash_4way(void *state, const void *input)
|
||||
keccak512_4way( &ctx_keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx_keccak, vhash );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// hash echo serially
|
||||
init_echo( &ctx_echo, 512 );
|
||||
@@ -59,7 +59,8 @@ void tribus_hash_4way(void *state, const void *input)
|
||||
memcpy( state+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_tribus_4way(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_tribus_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
@@ -69,9 +70,8 @@ int scanhash_tribus_4way(int thr_id, struct work *work, uint32_t max_nonce, uint
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
@@ -94,7 +94,7 @@ int scanhash_tribus_4way(int thr_id, struct work *work, uint32_t max_nonce, uint
|
||||
}
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
// precalc midstate
|
||||
// doing it one way then then interleaving would be faster but too
|
||||
@@ -119,21 +119,19 @@ int scanhash_tribus_4way(int thr_id, struct work *work, uint32_t max_nonce, uint
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( !( (hash+(i<<3))[7] & mask ) )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart);
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -14,15 +14,15 @@
|
||||
|
||||
void tribus_hash_4way( void *state, const void *input );
|
||||
|
||||
int scanhash_tribus_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_tribus_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#else
|
||||
|
||||
void tribus_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_tribus( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_tribus( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
bool tribus_thread_init();
|
||||
|
||||
|
||||
@@ -60,7 +60,8 @@ void tribus_hash(void *state, const void *input)
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_tribus(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_tribus( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
@@ -69,6 +70,7 @@ int scanhash_tribus(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19] - 1;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
|
||||
@@ -69,7 +69,7 @@ void x11_4way_hash( void *state, const void *input )
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// 3 Groestl
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -81,7 +81,7 @@ void x11_4way_hash( void *state, const void *input )
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
// 4way
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// 4 Skein
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
@@ -95,16 +95,16 @@ void x11_4way_hash( void *state, const void *input )
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// 7 Luffa parallel 2 way 128 bit
|
||||
mm256_interleave_2x128( vhash, hash0, hash1, 512 );
|
||||
mm256_interleave_2x128( vhashB, hash2, hash3, 512 );
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
mm256_intrlv_2x128( vhashB, hash2, hash3, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||
|
||||
// 8 Cubehash
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
|
||||
@@ -132,13 +132,13 @@ void x11_4way_hash( void *state, const void *input )
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
|
||||
// 10 Simd
|
||||
mm256_interleave_2x128( vhash, hash0, hash1, 512 );
|
||||
mm256_interleave_2x128( vhashB, hash2, hash3, 512 );
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
mm256_intrlv_2x128( vhashB, hash2, hash3, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||
|
||||
// 11 Echo
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
@@ -159,8 +159,8 @@ void x11_4way_hash( void *state, const void *input )
|
||||
memcpy( state+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_x11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_x11_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
@@ -169,8 +169,7 @@ int scanhash_x11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
@@ -182,7 +181,7 @@ int scanhash_x11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
for (int m=0; m < 6; m++)
|
||||
if (Htarg <= htmax[m])
|
||||
@@ -200,20 +199,18 @@ int scanhash_x11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -14,8 +14,8 @@ bool register_x11_algo( algo_gate_t* gate );
|
||||
|
||||
void x11_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_x11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_x11_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_x11_4way_ctx();
|
||||
|
||||
@@ -23,8 +23,8 @@ void init_x11_4way_ctx();
|
||||
|
||||
void x11_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_x11( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_x11( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_x11_ctx();
|
||||
|
||||
|
||||
@@ -133,8 +133,8 @@ void x11_hash( void *state, const void *input )
|
||||
memcpy( state, hash+64, 32 );
|
||||
}
|
||||
|
||||
int scanhash_x11( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_x11( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||
@@ -142,6 +142,7 @@ int scanhash_x11( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
|
||||
@@ -87,18 +87,18 @@ void x11evo_4way_hash( void *state, const void *input )
|
||||
case 0:
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
break;
|
||||
case 1:
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
if ( i >= len-1 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
break;
|
||||
case 2:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, 512 );
|
||||
@@ -112,46 +112,46 @@ void x11evo_4way_hash( void *state, const void *input )
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
if ( i < len-1 )
|
||||
mm256_interleave_4x64( vhash,
|
||||
mm256_intrlv_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 3:
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
if ( i >= len-1 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
break;
|
||||
case 4:
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
if ( i >= len-1 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
break;
|
||||
case 5:
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
if ( i >= len-1 )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
break;
|
||||
case 6:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
mm256_interleave_2x128( vhash, hash0, hash1, 64<<3 );
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 64<<3 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 64<<3 );
|
||||
mm256_interleave_2x128( vhash, hash2, hash3, 64<<3 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 64<<3 );
|
||||
mm256_intrlv_2x128( vhash, hash2, hash3, 64<<3 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhash, 64<<3 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhash, 64<<3 );
|
||||
if ( i < len-1 )
|
||||
mm256_interleave_4x64( vhash,
|
||||
mm256_intrlv_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 7:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
|
||||
(const byte*) hash0, 64 );
|
||||
@@ -165,11 +165,11 @@ void x11evo_4way_hash( void *state, const void *input )
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
|
||||
(const byte*) hash3, 64 );
|
||||
if ( i < len-1 )
|
||||
mm256_interleave_4x64( vhash,
|
||||
mm256_intrlv_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 8:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
sph_shavite512( &ctx.shavite, hash0, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
@@ -186,25 +186,25 @@ void x11evo_4way_hash( void *state, const void *input )
|
||||
sph_shavite512( &ctx.shavite, hash3, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
if ( i < len-1 )
|
||||
mm256_interleave_4x64( vhash,
|
||||
mm256_intrlv_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 9:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
mm256_interleave_2x128( vhash, hash0, hash1, 64<<3 );
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 64<<3 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 64<<3 );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 64<<3 );
|
||||
mm256_interleave_2x128( vhash, hash2, hash3, 64<<3 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 64<<3 );
|
||||
mm256_intrlv_2x128( vhash, hash2, hash3, 64<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 64<<3 );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhash, 64<<3 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhash, 64<<3 );
|
||||
if ( i < len-1 )
|
||||
mm256_interleave_4x64( vhash,
|
||||
mm256_intrlv_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 10:
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3,
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, 512 );
|
||||
@@ -218,7 +218,7 @@ void x11evo_4way_hash( void *state, const void *input )
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
if ( i < len-1 )
|
||||
mm256_interleave_4x64( vhash,
|
||||
mm256_intrlv_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
}
|
||||
@@ -232,8 +232,8 @@ void x11evo_4way_hash( void *state, const void *input )
|
||||
|
||||
//static const uint32_t diff1targ = 0x0000ffff;
|
||||
|
||||
int scanhash_x11evo_4way( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_x11evo_4way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
@@ -242,8 +242,7 @@ int scanhash_x11evo_4way( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
@@ -270,7 +269,7 @@ int scanhash_x11evo_4way( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
}
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
@@ -284,18 +283,16 @@ int scanhash_x11evo_4way( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & hmask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -19,8 +19,8 @@ bool register_x11evo_algo( algo_gate_t* gate );
|
||||
|
||||
void x11evo_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_x11evo_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_x11evo_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_x11evo_4way_ctx();
|
||||
|
||||
@@ -28,8 +28,8 @@ void init_x11evo_4way_ctx();
|
||||
|
||||
void x11evo_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_x11evo( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_x11evo( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_x11evo_ctx();
|
||||
|
||||
|
||||
@@ -157,8 +157,8 @@ void x11evo_hash( void *state, const void *input )
|
||||
|
||||
//static const uint32_t diff1targ = 0x0000ffff;
|
||||
|
||||
int scanhash_x11evo( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_x11evo( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||
@@ -166,6 +166,7 @@ int scanhash_x11evo( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
@@ -70,7 +70,7 @@ void x11gost_4way_hash( void *state, const void *input )
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
memcpy( &ctx.groestl, &x11gost_4way_ctx.groestl,
|
||||
@@ -84,7 +84,7 @@ void x11gost_4way_hash( void *state, const void *input )
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
// 4way
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
@@ -96,7 +96,7 @@ void x11gost_4way_hash( void *state, const void *input )
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
// Serial
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
sph_gost512( &ctx.gost, hash0, 64 );
|
||||
sph_gost512_close( &ctx.gost, hash0 );
|
||||
@@ -110,13 +110,13 @@ void x11gost_4way_hash( void *state, const void *input )
|
||||
sph_gost512( &ctx.gost, hash3, 64 );
|
||||
sph_gost512_close( &ctx.gost, hash3 );
|
||||
|
||||
mm256_interleave_2x128( vhash, hash0, hash1, 512 );
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_interleave_2x128( vhash, hash2, hash3, 512 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhash, 512 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
|
||||
memcpy( &ctx.cube, &x11gost_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
@@ -141,12 +141,12 @@ void x11gost_4way_hash( void *state, const void *input )
|
||||
sph_shavite512( &ctx.shavite, hash3, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
|
||||
mm256_interleave_2x128( vhash, hash0, hash1, 512 );
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_interleave_2x128( vhash, hash2, hash3, 512 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhash, 512 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, 512 );
|
||||
@@ -166,8 +166,8 @@ void x11gost_4way_hash( void *state, const void *input )
|
||||
memcpy( state+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_x11gost_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
@@ -176,8 +176,7 @@ int scanhash_x11gost_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
@@ -189,7 +188,7 @@ int scanhash_x11gost_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
for (int m=0; m < 6; m++)
|
||||
if (Htarg <= htmax[m])
|
||||
@@ -207,20 +206,18 @@ int scanhash_x11gost_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -14,8 +14,8 @@ bool register_x11gost_algo( algo_gate_t* gate );
|
||||
|
||||
void x11gost_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_x11gost_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_x11gost_4way_ctx();
|
||||
|
||||
@@ -23,8 +23,8 @@ void init_x11gost_4way_ctx();
|
||||
|
||||
void x11gost_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_x11gost( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_x11gost( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_x11gost_ctx();
|
||||
|
||||
|
||||
@@ -135,14 +135,15 @@ void x11gost_hash(void *output, const void *input)
|
||||
memcpy(output, hashA, 32);
|
||||
}
|
||||
|
||||
int scanhash_x11gost( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
int scanhash_x11gost( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t nonce = first_nonce;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user