mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.9.5.4
This commit is contained in:
@@ -69,7 +69,7 @@ void c11_4way_hash( void *state, const void *input )
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// 3 Groestl
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -81,7 +81,7 @@ void c11_4way_hash( void *state, const void *input )
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
// 4way
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// 4 JH
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
@@ -96,16 +96,16 @@ void c11_4way_hash( void *state, const void *input )
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
// Serial
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// 7 Luffa
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
mm256_intrlv_2x128( vhashB, hash2, hash3, 512 );
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
intrlv_2x128( vhashB, hash2, hash3, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||
|
||||
// 8 Cubehash
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
|
||||
@@ -133,13 +133,13 @@ void c11_4way_hash( void *state, const void *input )
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
|
||||
// 10 Simd
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
mm256_intrlv_2x128( vhashB, hash2, hash3, 512 );
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
intrlv_2x128( vhashB, hash2, hash3, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||
|
||||
// 11 Echo
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
@@ -165,7 +165,6 @@ int scanhash_c11_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
@@ -178,9 +177,7 @@ int scanhash_c11_4way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||
0xFFFFF000, 0xFFFF0000, 0 };
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
// mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
for (int m=0; m < 6; m++)
|
||||
if (Htarg <= htmax[m])
|
||||
|
||||
@@ -87,19 +87,16 @@ void timetravel_4way_hash(void *output, const void *input)
|
||||
blake512_4way( &ctx.blake, vhashA, dataLen );
|
||||
blake512_4way_close( &ctx.blake, vhashB );
|
||||
if ( i == 7 )
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 1:
|
||||
bmw512_4way( &ctx.bmw, vhashA, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||
if ( i == 7 )
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 2:
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, dataLen<<3 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
@@ -112,47 +109,40 @@ void timetravel_4way_hash(void *output, const void *input)
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, dataLen<<3 );
|
||||
if ( i != 7 )
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 3:
|
||||
skein512_4way( &ctx.skein, vhashA, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
if ( i == 7 )
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 4:
|
||||
jh512_4way( &ctx.jh, vhashA, dataLen );
|
||||
jh512_4way_close( &ctx.jh, vhashB );
|
||||
if ( i == 7 )
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 5:
|
||||
keccak512_4way( &ctx.keccak, vhashA, dataLen );
|
||||
keccak512_4way_close( &ctx.keccak, vhashB );
|
||||
if ( i == 7 )
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 6:
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
mm256_intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
|
||||
intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
mm256_intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 );
|
||||
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 );
|
||||
if ( i != 7 )
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 7:
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
|
||||
(const byte*)hash0, dataLen );
|
||||
memcpy( &ctx.cube, &tt8_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
@@ -165,8 +155,7 @@ void timetravel_4way_hash(void *output, const void *input)
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
|
||||
(const byte*)hash3, dataLen );
|
||||
if ( i != 7 )
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
default:
|
||||
applog(LOG_ERR,"SWERR: timetravel invalid permutation");
|
||||
@@ -215,7 +204,7 @@ int scanhash_timetravel_4way( struct work *work, uint32_t max_nonce,
|
||||
}
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
|
||||
@@ -93,19 +93,16 @@ void timetravel10_4way_hash(void *output, const void *input)
|
||||
blake512_4way( &ctx.blake, vhashA, dataLen );
|
||||
blake512_4way_close( &ctx.blake, vhashB );
|
||||
if ( i == 9 )
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 1:
|
||||
bmw512_4way( &ctx.bmw, vhashA, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||
if ( i == 9 )
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 2:
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, dataLen<<3 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
@@ -118,46 +115,40 @@ void timetravel10_4way_hash(void *output, const void *input)
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, dataLen<<3 );
|
||||
if ( i != 9 )
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 3:
|
||||
skein512_4way( &ctx.skein, vhashA, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
if ( i == 9 )
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 4:
|
||||
jh512_4way( &ctx.jh, vhashA, dataLen );
|
||||
jh512_4way_close( &ctx.jh, vhashB );
|
||||
if ( i == 9 )
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 5:
|
||||
keccak512_4way( &ctx.keccak, vhashA, dataLen );
|
||||
keccak512_4way_close( &ctx.keccak, vhashB );
|
||||
if ( i == 9 )
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashB, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
|
||||
break;
|
||||
case 6:
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
mm256_intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
|
||||
intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
mm256_intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 );
|
||||
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 );
|
||||
if ( i != 9 )
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 7:
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
|
||||
(const byte*)hash0, dataLen );
|
||||
@@ -171,12 +162,10 @@ void timetravel10_4way_hash(void *output, const void *input)
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
|
||||
(const byte*)hash3, dataLen );
|
||||
if ( i != 9 )
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 8:
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
|
||||
sph_shavite512( &ctx.shavite, hash0, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
memcpy( &ctx.shavite, &tt10_4way_ctx.shavite, sizeof ctx.shavite );
|
||||
@@ -189,22 +178,19 @@ void timetravel10_4way_hash(void *output, const void *input)
|
||||
sph_shavite512( &ctx.shavite, hash3, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
if ( i != 9 )
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
case 9:
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhashA, dataLen<<3 );
|
||||
mm256_intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
|
||||
intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 );
|
||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
mm256_intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 );
|
||||
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 );
|
||||
if ( i != 9 )
|
||||
mm256_intrlv_4x64( vhashB,
|
||||
hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
break;
|
||||
default:
|
||||
applog(LOG_ERR,"SWERR: timetravel invalid permutation");
|
||||
@@ -253,7 +239,7 @@ int scanhash_timetravel10_4way( struct work *work,
|
||||
}
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
|
||||
@@ -37,7 +37,7 @@ void tribus_hash_4way(void *state, const void *input)
|
||||
keccak512_4way( &ctx_keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx_keccak, vhash );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// hash echo serially
|
||||
init_echo( &ctx_echo, 512 );
|
||||
@@ -64,7 +64,6 @@ int scanhash_tribus_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
@@ -87,9 +86,7 @@ int scanhash_tribus_4way( struct work *work, uint32_t max_nonce,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
// mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
// precalc midstate
|
||||
// doing it one way then then interleaving would be faster but too
|
||||
|
||||
@@ -69,7 +69,7 @@ void x11_4way_hash( void *state, const void *input )
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// 3 Groestl
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -81,7 +81,7 @@ void x11_4way_hash( void *state, const void *input )
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
// 4way
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// 4 Skein
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
@@ -95,16 +95,16 @@ void x11_4way_hash( void *state, const void *input )
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// 7 Luffa parallel 2 way 128 bit
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
mm256_intrlv_2x128( vhashB, hash2, hash3, 512 );
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
intrlv_2x128( vhashB, hash2, hash3, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||
|
||||
// 8 Cubehash
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
|
||||
@@ -132,13 +132,13 @@ void x11_4way_hash( void *state, const void *input )
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
|
||||
// 10 Simd
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
mm256_intrlv_2x128( vhashB, hash2, hash3, 512 );
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
intrlv_2x128( vhashB, hash2, hash3, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||
|
||||
// 11 Echo
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
@@ -164,7 +164,6 @@ int scanhash_x11_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
@@ -177,9 +176,7 @@ int scanhash_x11_4way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||
0xFFFFF000, 0xFFFF0000, 0 };
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
// mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
for (int m=0; m < 6; m++)
|
||||
if (Htarg <= htmax[m])
|
||||
|
||||
@@ -87,19 +87,16 @@ void x11evo_4way_hash( void *state, const void *input )
|
||||
case 0:
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
break;
|
||||
case 1:
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
if ( i >= len-1 )
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
break;
|
||||
case 2:
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
@@ -112,47 +109,40 @@ void x11evo_4way_hash( void *state, const void *input )
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
if ( i < len-1 )
|
||||
mm256_intrlv_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 3:
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
if ( i >= len-1 )
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
break;
|
||||
case 4:
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
if ( i >= len-1 )
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
break;
|
||||
case 5:
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
if ( i >= len-1 )
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
break;
|
||||
case 6:
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 64<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
intrlv_2x128( vhash, hash0, hash1, 64<<3 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 64<<3 );
|
||||
mm256_intrlv_2x128( vhash, hash2, hash3, 64<<3 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 64<<3 );
|
||||
intrlv_2x128( vhash, hash2, hash3, 64<<3 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhash, 64<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 64<<3 );
|
||||
if ( i < len-1 )
|
||||
mm256_intrlv_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 7:
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
|
||||
(const byte*) hash0, 64 );
|
||||
memcpy( &ctx.cube, &x11evo_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
@@ -165,12 +155,10 @@ void x11evo_4way_hash( void *state, const void *input )
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
|
||||
(const byte*) hash3, 64 );
|
||||
if ( i < len-1 )
|
||||
mm256_intrlv_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 8:
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
sph_shavite512( &ctx.shavite, hash0, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
memcpy( &ctx.shavite, &x11evo_4way_ctx.shavite,
|
||||
@@ -186,26 +174,22 @@ void x11evo_4way_hash( void *state, const void *input )
|
||||
sph_shavite512( &ctx.shavite, hash3, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
if ( i < len-1 )
|
||||
mm256_intrlv_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 9:
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 64<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
intrlv_2x128( vhash, hash0, hash1, 64<<3 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 64<<3 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 64<<3 );
|
||||
mm256_intrlv_2x128( vhash, hash2, hash3, 64<<3 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 64<<3 );
|
||||
intrlv_2x128( vhash, hash2, hash3, 64<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 64<<3 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhash, 64<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 64<<3 );
|
||||
if ( i < len-1 )
|
||||
mm256_intrlv_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
case 10:
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
|
||||
vhash, 64<<3 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, 512 );
|
||||
memcpy( &ctx.echo, &x11evo_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
@@ -218,8 +202,7 @@ void x11evo_4way_hash( void *state, const void *input )
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
if ( i < len-1 )
|
||||
mm256_intrlv_4x64( vhash,
|
||||
hash0, hash1, hash2, hash3, 64<<3 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 );
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -269,7 +252,7 @@ int scanhash_x11evo_4way( struct work* work, uint32_t max_nonce,
|
||||
}
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
|
||||
@@ -70,7 +70,7 @@ void x11gost_4way_hash( void *state, const void *input )
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
memcpy( &ctx.groestl, &x11gost_4way_ctx.groestl,
|
||||
@@ -84,7 +84,7 @@ void x11gost_4way_hash( void *state, const void *input )
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
// 4way
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
@@ -96,7 +96,7 @@ void x11gost_4way_hash( void *state, const void *input )
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
// Serial
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
sph_gost512( &ctx.gost, hash0, 64 );
|
||||
sph_gost512_close( &ctx.gost, hash0 );
|
||||
@@ -110,13 +110,13 @@ void x11gost_4way_hash( void *state, const void *input )
|
||||
sph_gost512( &ctx.gost, hash3, 64 );
|
||||
sph_gost512_close( &ctx.gost, hash3 );
|
||||
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
|
||||
memcpy( &ctx.cube, &x11gost_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
@@ -141,12 +141,12 @@ void x11gost_4way_hash( void *state, const void *input )
|
||||
sph_shavite512( &ctx.shavite, hash3, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, 512 );
|
||||
@@ -171,7 +171,6 @@ int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
@@ -184,9 +183,7 @@ int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||
0xFFFFF000, 0xFFFF0000, 0 };
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
// mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
for (int m=0; m < 6; m++)
|
||||
if (Htarg <= htmax[m])
|
||||
|
||||
Reference in New Issue
Block a user