This commit is contained in:
Jay D Dee
2019-07-15 17:00:26 -04:00
parent e625ed5420
commit e2d5762ef2
63 changed files with 1973 additions and 2980 deletions

View File

@@ -69,7 +69,7 @@ void c11_4way_hash( void *state, const void *input )
bmw512_4way_close( &ctx.bmw, vhash );
// Serial
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// 3 Groestl
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
@@ -81,7 +81,7 @@ void c11_4way_hash( void *state, const void *input )
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
// 4way
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
// 4 JH
jh512_4way( &ctx.jh, vhash, 64 );
@@ -96,16 +96,16 @@ void c11_4way_hash( void *state, const void *input )
skein512_4way_close( &ctx.skein, vhash );
// Serial
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// 7 Luffa
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
mm256_intrlv_2x128( vhashB, hash2, hash3, 512 );
intrlv_2x128( vhash, hash0, hash1, 512 );
intrlv_2x128( vhashB, hash2, hash3, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
dintrlv_2x128( hash2, hash3, vhashB, 512 );
// 8 Cubehash
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
@@ -133,13 +133,13 @@ void c11_4way_hash( void *state, const void *input )
sph_shavite512_close( &ctx.shavite, hash3 );
// 10 Simd
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
mm256_intrlv_2x128( vhashB, hash2, hash3, 512 );
intrlv_2x128( vhash, hash0, hash1, 512 );
intrlv_2x128( vhashB, hash2, hash3, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
dintrlv_2x128( hash2, hash3, vhashB, 512 );
// 11 Echo
update_final_echo( &ctx.echo, (BitSequence *)hash0,
@@ -165,7 +165,6 @@ int scanhash_c11_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
@@ -178,9 +177,7 @@ int scanhash_c11_4way( struct work *work, uint32_t max_nonce,
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
mm256_bswap32_intrlv80_4x64( vdata, pdata );
for (int m=0; m < 6; m++)
if (Htarg <= htmax[m])

View File

@@ -87,19 +87,16 @@ void timetravel_4way_hash(void *output, const void *input)
blake512_4way( &ctx.blake, vhashA, dataLen );
blake512_4way_close( &ctx.blake, vhashB );
if ( i == 7 )
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashB, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break;
case 1:
bmw512_4way( &ctx.bmw, vhashA, dataLen );
bmw512_4way_close( &ctx.bmw, vhashB );
if ( i == 7 )
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashB, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break;
case 2:
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashA, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
update_and_final_groestl( &ctx.groestl, (char*)hash0,
(char*)hash0, dataLen<<3 );
reinit_groestl( &ctx.groestl );
@@ -112,47 +109,40 @@ void timetravel_4way_hash(void *output, const void *input)
update_and_final_groestl( &ctx.groestl, (char*)hash3,
(char*)hash3, dataLen<<3 );
if ( i != 7 )
mm256_intrlv_4x64( vhashB,
hash0, hash1, hash2, hash3, dataLen<<3 );
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
break;
case 3:
skein512_4way( &ctx.skein, vhashA, dataLen );
skein512_4way_close( &ctx.skein, vhashB );
if ( i == 7 )
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashB, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break;
case 4:
jh512_4way( &ctx.jh, vhashA, dataLen );
jh512_4way_close( &ctx.jh, vhashB );
if ( i == 7 )
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashB, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break;
case 5:
keccak512_4way( &ctx.keccak, vhashA, dataLen );
keccak512_4way_close( &ctx.keccak, vhashB );
if ( i == 7 )
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashB, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break;
case 6:
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashA, dataLen<<3 );
mm256_intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 );
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
mm256_dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
mm256_intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 );
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
mm256_dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 );
dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 );
if ( i != 7 )
mm256_intrlv_4x64( vhashB,
hash0, hash1, hash2, hash3, dataLen<<3 );
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
break;
case 7:
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashA, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
(const byte*)hash0, dataLen );
memcpy( &ctx.cube, &tt8_4way_ctx.cube, sizeof(cubehashParam) );
@@ -165,8 +155,7 @@ void timetravel_4way_hash(void *output, const void *input)
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
(const byte*)hash3, dataLen );
if ( i != 7 )
mm256_intrlv_4x64( vhashB,
hash0, hash1, hash2, hash3, dataLen<<3 );
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
break;
default:
applog(LOG_ERR,"SWERR: timetravel invalid permutation");
@@ -215,7 +204,7 @@ int scanhash_timetravel_4way( struct work *work, uint32_t max_nonce,
}
uint64_t *edata = (uint64_t*)endiandata;
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do
{

View File

@@ -93,19 +93,16 @@ void timetravel10_4way_hash(void *output, const void *input)
blake512_4way( &ctx.blake, vhashA, dataLen );
blake512_4way_close( &ctx.blake, vhashB );
if ( i == 9 )
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashB, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break;
case 1:
bmw512_4way( &ctx.bmw, vhashA, dataLen );
bmw512_4way_close( &ctx.bmw, vhashB );
if ( i == 9 )
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashB, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break;
case 2:
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashA, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
update_and_final_groestl( &ctx.groestl, (char*)hash0,
(char*)hash0, dataLen<<3 );
reinit_groestl( &ctx.groestl );
@@ -118,46 +115,40 @@ void timetravel10_4way_hash(void *output, const void *input)
update_and_final_groestl( &ctx.groestl, (char*)hash3,
(char*)hash3, dataLen<<3 );
if ( i != 9 )
mm256_intrlv_4x64( vhashB,
hash0, hash1, hash2, hash3, dataLen<<3 );
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
break;
case 3:
skein512_4way( &ctx.skein, vhashA, dataLen );
skein512_4way_close( &ctx.skein, vhashB );
if ( i == 9 )
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashB, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break;
case 4:
jh512_4way( &ctx.jh, vhashA, dataLen );
jh512_4way_close( &ctx.jh, vhashB );
if ( i == 9 )
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashB, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break;
case 5:
keccak512_4way( &ctx.keccak, vhashA, dataLen );
keccak512_4way_close( &ctx.keccak, vhashB );
if ( i == 9 )
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashB, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break;
case 6:
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashA, dataLen<<3 );
mm256_intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 );
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
mm256_dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
mm256_intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 );
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
mm256_dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 );
dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 );
if ( i != 9 )
mm256_intrlv_4x64( vhashB,
hash0, hash1, hash2, hash3, dataLen<<3 );
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
break;
case 7:
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashA, dataLen<<3 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
(const byte*)hash0, dataLen );
@@ -171,12 +162,10 @@ void timetravel10_4way_hash(void *output, const void *input)
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
(const byte*)hash3, dataLen );
if ( i != 9 )
mm256_intrlv_4x64( vhashB,
hash0, hash1, hash2, hash3, dataLen<<3 );
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
break;
case 8:
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashA, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
sph_shavite512( &ctx.shavite, hash0, dataLen );
sph_shavite512_close( &ctx.shavite, hash0 );
memcpy( &ctx.shavite, &tt10_4way_ctx.shavite, sizeof ctx.shavite );
@@ -189,22 +178,19 @@ void timetravel10_4way_hash(void *output, const void *input)
sph_shavite512( &ctx.shavite, hash3, dataLen );
sph_shavite512_close( &ctx.shavite, hash3 );
if ( i != 9 )
mm256_intrlv_4x64( vhashB,
hash0, hash1, hash2, hash3, dataLen<<3 );
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
break;
case 9:
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhashA, dataLen<<3 );
mm256_intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
intrlv_2x128( vhashA, hash0, hash1, dataLen<<3 );
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
mm256_dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
mm256_intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 );
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
intrlv_2x128( vhashA, hash2, hash3, dataLen<<3 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
mm256_dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 );
dintrlv_2x128( hash2, hash3, vhashA, dataLen<<3 );
if ( i != 9 )
mm256_intrlv_4x64( vhashB,
hash0, hash1, hash2, hash3, dataLen<<3 );
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
break;
default:
applog(LOG_ERR,"SWERR: timetravel invalid permutation");
@@ -253,7 +239,7 @@ int scanhash_timetravel10_4way( struct work *work,
}
uint64_t *edata = (uint64_t*)endiandata;
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do
{

View File

@@ -37,7 +37,7 @@ void tribus_hash_4way(void *state, const void *input)
keccak512_4way( &ctx_keccak, vhash, 64 );
keccak512_4way_close( &ctx_keccak, vhash );
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// hash echo serially
init_echo( &ctx_echo, 512 );
@@ -64,7 +64,6 @@ int scanhash_tribus_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -87,9 +86,7 @@ int scanhash_tribus_4way( struct work *work, uint32_t max_nonce,
0xFFFF0000,
0 };
swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
mm256_bswap32_intrlv80_4x64( vdata, pdata );
// precalc midstate
// doing it one way then then interleaving would be faster but too

View File

@@ -69,7 +69,7 @@ void x11_4way_hash( void *state, const void *input )
bmw512_4way_close( &ctx.bmw, vhash );
// Serial
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// 3 Groestl
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
@@ -81,7 +81,7 @@ void x11_4way_hash( void *state, const void *input )
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
// 4way
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
// 4 Skein
skein512_4way( &ctx.skein, vhash, 64 );
@@ -95,16 +95,16 @@ void x11_4way_hash( void *state, const void *input )
keccak512_4way( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash );
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// 7 Luffa parallel 2 way 128 bit
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
mm256_intrlv_2x128( vhashB, hash2, hash3, 512 );
intrlv_2x128( vhash, hash0, hash1, 512 );
intrlv_2x128( vhashB, hash2, hash3, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
dintrlv_2x128( hash2, hash3, vhashB, 512 );
// 8 Cubehash
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
@@ -132,13 +132,13 @@ void x11_4way_hash( void *state, const void *input )
sph_shavite512_close( &ctx.shavite, hash3 );
// 10 Simd
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
mm256_intrlv_2x128( vhashB, hash2, hash3, 512 );
intrlv_2x128( vhash, hash0, hash1, 512 );
intrlv_2x128( vhashB, hash2, hash3, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
dintrlv_2x128( hash2, hash3, vhashB, 512 );
// 11 Echo
update_final_echo( &ctx.echo, (BitSequence *)hash0,
@@ -164,7 +164,6 @@ int scanhash_x11_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
@@ -177,9 +176,7 @@ int scanhash_x11_4way( struct work *work, uint32_t max_nonce,
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
mm256_bswap32_intrlv80_4x64( vdata, pdata );
for (int m=0; m < 6; m++)
if (Htarg <= htmax[m])

View File

@@ -87,19 +87,16 @@ void x11evo_4way_hash( void *state, const void *input )
case 0:
blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhash, 64<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
break;
case 1:
bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
if ( i >= len-1 )
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhash, 64<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
break;
case 2:
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhash, 64<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
update_and_final_groestl( &ctx.groestl, (char*)hash0,
(char*)hash0, 512 );
reinit_groestl( &ctx.groestl );
@@ -112,47 +109,40 @@ void x11evo_4way_hash( void *state, const void *input )
update_and_final_groestl( &ctx.groestl, (char*)hash3,
(char*)hash3, 512 );
if ( i < len-1 )
mm256_intrlv_4x64( vhash,
hash0, hash1, hash2, hash3, 64<<3 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 );
break;
case 3:
skein512_4way( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash );
if ( i >= len-1 )
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhash, 64<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
break;
case 4:
jh512_4way( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash );
if ( i >= len-1 )
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhash, 64<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
break;
case 5:
keccak512_4way( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash );
if ( i >= len-1 )
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhash, 64<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
break;
case 6:
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhash, 64<<3 );
mm256_intrlv_2x128( vhash, hash0, hash1, 64<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
intrlv_2x128( vhash, hash0, hash1, 64<<3 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
mm256_dintrlv_2x128( hash0, hash1, vhash, 64<<3 );
mm256_intrlv_2x128( vhash, hash2, hash3, 64<<3 );
dintrlv_2x128( hash0, hash1, vhash, 64<<3 );
intrlv_2x128( vhash, hash2, hash3, 64<<3 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
mm256_dintrlv_2x128( hash2, hash3, vhash, 64<<3 );
dintrlv_2x128( hash2, hash3, vhash, 64<<3 );
if ( i < len-1 )
mm256_intrlv_4x64( vhash,
hash0, hash1, hash2, hash3, 64<<3 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 );
break;
case 7:
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhash, 64<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
(const byte*) hash0, 64 );
memcpy( &ctx.cube, &x11evo_4way_ctx.cube, sizeof(cubehashParam) );
@@ -165,12 +155,10 @@ void x11evo_4way_hash( void *state, const void *input )
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
(const byte*) hash3, 64 );
if ( i < len-1 )
mm256_intrlv_4x64( vhash,
hash0, hash1, hash2, hash3, 64<<3 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 );
break;
case 8:
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhash, 64<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
sph_shavite512( &ctx.shavite, hash0, 64 );
sph_shavite512_close( &ctx.shavite, hash0 );
memcpy( &ctx.shavite, &x11evo_4way_ctx.shavite,
@@ -186,26 +174,22 @@ void x11evo_4way_hash( void *state, const void *input )
sph_shavite512( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 );
if ( i < len-1 )
mm256_intrlv_4x64( vhash,
hash0, hash1, hash2, hash3, 64<<3 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 );
break;
case 9:
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhash, 64<<3 );
mm256_intrlv_2x128( vhash, hash0, hash1, 64<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
intrlv_2x128( vhash, hash0, hash1, 64<<3 );
simd_2way_update_close( &ctx.simd, vhash, vhash, 64<<3 );
mm256_dintrlv_2x128( hash0, hash1, vhash, 64<<3 );
mm256_intrlv_2x128( vhash, hash2, hash3, 64<<3 );
dintrlv_2x128( hash0, hash1, vhash, 64<<3 );
intrlv_2x128( vhash, hash2, hash3, 64<<3 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, 64<<3 );
mm256_dintrlv_2x128( hash2, hash3, vhash, 64<<3 );
dintrlv_2x128( hash2, hash3, vhash, 64<<3 );
if ( i < len-1 )
mm256_intrlv_4x64( vhash,
hash0, hash1, hash2, hash3, 64<<3 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 );
break;
case 10:
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3,
vhash, 64<<3 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
update_final_echo( &ctx.echo, (BitSequence *)hash0,
(const BitSequence *) hash0, 512 );
memcpy( &ctx.echo, &x11evo_4way_ctx.echo, sizeof(hashState_echo) );
@@ -218,8 +202,7 @@ void x11evo_4way_hash( void *state, const void *input )
update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, 512 );
if ( i < len-1 )
mm256_intrlv_4x64( vhash,
hash0, hash1, hash2, hash3, 64<<3 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 );
break;
}
}
@@ -269,7 +252,7 @@ int scanhash_x11evo_4way( struct work* work, uint32_t max_nonce,
}
uint64_t *edata = (uint64_t*)endiandata;
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do
{

View File

@@ -70,7 +70,7 @@ void x11gost_4way_hash( void *state, const void *input )
bmw512_4way_close( &ctx.bmw, vhash );
// Serial
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
memcpy( &ctx.groestl, &x11gost_4way_ctx.groestl,
@@ -84,7 +84,7 @@ void x11gost_4way_hash( void *state, const void *input )
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
// 4way
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
skein512_4way( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash );
@@ -96,7 +96,7 @@ void x11gost_4way_hash( void *state, const void *input )
keccak512_4way_close( &ctx.keccak, vhash );
// Serial
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
sph_gost512( &ctx.gost, hash0, 64 );
sph_gost512_close( &ctx.gost, hash0 );
@@ -110,13 +110,13 @@ void x11gost_4way_hash( void *state, const void *input )
sph_gost512( &ctx.gost, hash3, 64 );
sph_gost512_close( &ctx.gost, hash3 );
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
intrlv_2x128( vhash, hash0, hash1, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
mm256_intrlv_2x128( vhash, hash2, hash3, 512 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
intrlv_2x128( vhash, hash2, hash3, 512 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
dintrlv_2x128( hash2, hash3, vhash, 512 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
memcpy( &ctx.cube, &x11gost_4way_ctx.cube, sizeof(cubehashParam) );
@@ -141,12 +141,12 @@ void x11gost_4way_hash( void *state, const void *input )
sph_shavite512( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 );
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
intrlv_2x128( vhash, hash0, hash1, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
mm256_intrlv_2x128( vhash, hash2, hash3, 512 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
intrlv_2x128( vhash, hash2, hash3, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
dintrlv_2x128( hash2, hash3, vhash, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash0,
(const BitSequence *) hash0, 512 );
@@ -171,7 +171,6 @@ int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
@@ -184,9 +183,7 @@ int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce,
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
mm256_bswap32_intrlv80_4x64( vdata, pdata );
for (int m=0; m < 6; m++)
if (Htarg <= htmax[m])