mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.9.5.4
This commit is contained in:
@@ -39,7 +39,7 @@ void polytimos_4way_hash( void *output, const void *input )
|
||||
|
||||
// Need to convert from 64 bit interleaved to 32 bit interleaved.
|
||||
uint32_t vhash32[16*4];
|
||||
mm256_rintrlv_4x64_4x32( vhash32, vhash, 512 );
|
||||
rintrlv_4x64_4x32( vhash32, vhash, 512 );
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash32, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash32 );
|
||||
@@ -58,15 +58,15 @@ void polytimos_4way_hash( void *output, const void *input )
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
@@ -105,7 +105,6 @@ int scanhash_polytimos_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
@@ -118,9 +117,7 @@ int scanhash_polytimos_4way( struct work *work, uint32_t max_nonce,
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
// mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
do {
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
@@ -40,7 +40,7 @@ void veltor_4way_hash( void *output, const void *input )
|
||||
|
||||
skein512_4way( &ctx.skein, input, 80 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
sph_shavite512( &ctx.shavite, hash0, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
@@ -82,7 +82,6 @@ int scanhash_veltor_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
@@ -95,9 +94,7 @@ int scanhash_veltor_4way( struct work *work, uint32_t max_nonce,
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
// mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
do
|
||||
{
|
||||
|
@@ -78,7 +78,7 @@ void x14_4way_hash( void *state, const void *input )
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// 3 Groestl
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -90,7 +90,7 @@ void x14_4way_hash( void *state, const void *input )
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
// Parallel 4way
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// 4 Skein
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
@@ -105,16 +105,16 @@ void x14_4way_hash( void *state, const void *input )
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
// Serial
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// 7 Luffa
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
|
||||
// 8 Cubehash
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
|
||||
@@ -142,13 +142,13 @@ void x14_4way_hash( void *state, const void *input )
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
|
||||
// 10 Simd
|
||||
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
|
||||
// 11 Echo
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
@@ -164,10 +164,10 @@ void x14_4way_hash( void *state, const void *input )
|
||||
(const BitSequence *) hash3, 512 );
|
||||
|
||||
// 12 Hamsi parallel 4way 32 bit
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// 13 Fugue serial
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
@@ -193,7 +193,6 @@ int scanhash_x14_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
@@ -206,9 +205,7 @@ int scanhash_x14_4way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||
0xFFFFF000, 0xFFFF0000, 0 };
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
// mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
for ( int m=0; m < 6; m++ )
|
||||
if ( Htarg <= htmax[m] )
|
||||
|
Reference in New Issue
Block a user