This commit is contained in:
Jay D Dee
2019-07-15 17:00:26 -04:00
parent e625ed5420
commit e2d5762ef2
63 changed files with 1973 additions and 2980 deletions

View File

@@ -44,11 +44,11 @@ void allium_4way_hash( void *state, const void *input )
blake256_4way( &ctx.blake, input + (64<<2), 16 );
blake256_4way_close( &ctx.blake, vhash32 );
mm256_rintrlv_4x32_4x64( vhash64, vhash32, 256 );
rintrlv_4x32_4x64( vhash64, vhash32, 256 );
keccak256_4way( &ctx.keccak, vhash64, 32 );
keccak256_4way_close( &ctx.keccak, vhash64 );
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
@@ -68,12 +68,12 @@ void allium_4way_hash( void *state, const void *input )
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
mm256_intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
skein256_4way( &ctx.skein, vhash64, 32 );
skein256_4way_close( &ctx.skein, vhash64 );
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
update_and_final_groestl256( &ctx.groestl, state, hash0, 256 );
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
@@ -103,7 +103,7 @@ int scanhash_allium_4way( struct work *work, uint32_t max_nonce,
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
mm128_bswap_intrlv80_4x32( vdata, pdata );
mm128_bswap32_intrlv80_4x32( vdata, pdata );
blake256_4way_init( &allium_4way_ctx.blake );
blake256_4way( &allium_4way_ctx.blake, vdata, 64 );

View File

@@ -64,7 +64,7 @@ int scanhash_lyra2h_4way( struct work *work, uint32_t max_nonce,
if ( opt_benchmark )
ptarget[7] = 0x0000ff;
mm128_bswap_intrlv80_4x32( vdata, pdata );
mm128_bswap32_intrlv80_4x32( vdata, pdata );
lyra2h_4way_midstate( vdata );
do {

View File

@@ -42,12 +42,12 @@ void lyra2rev2_4way_hash( void *state, const void *input )
blake256_4way( &ctx.blake, input + (64<<2), 16 );
blake256_4way_close( &ctx.blake, vhash );
mm256_rintrlv_4x32_4x64( vhash64, vhash, 256 );
rintrlv_4x32_4x64( vhash64, vhash, 256 );
keccak256_4way( &ctx.keccak, vhash64, 32 );
keccak256_4way_close( &ctx.keccak, vhash64 );
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
cubehashInit( &ctx.cube, 256, 16, 32 );
@@ -62,12 +62,12 @@ void lyra2rev2_4way_hash( void *state, const void *input )
LYRA2REV2( l2v2_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
LYRA2REV2( l2v2_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
mm256_intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
skein256_4way( &ctx.skein, vhash64, 32 );
skein256_4way_close( &ctx.skein, vhash64 );
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
@@ -102,7 +102,7 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
mm128_bswap_intrlv80_4x32( vdata, pdata );
mm128_bswap32_intrlv80_4x32( vdata, pdata );
blake256_4way_init( &l2v2_4way_ctx.blake );
blake256_4way( &l2v2_4way_ctx.blake, vdata, 64 );

View File

@@ -41,7 +41,7 @@ void lyra2rev3_8way_hash( void *state, const void *input )
blake256_8way( &ctx.blake, input, 80 );
blake256_8way_close( &ctx.blake, vhash );
mm256_dintrlv_8x32( hash0, hash1, hash2, hash3,
dintrlv_8x32( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
LYRA2REV3( l2v3_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
@@ -78,7 +78,7 @@ void lyra2rev3_8way_hash( void *state, const void *input )
LYRA2REV3( l2v3_wholeMatrix, hash6, 32, hash6, 32, hash6, 32, 1, 4, 4 );
LYRA2REV3( l2v3_wholeMatrix, hash7, 32, hash7, 32, hash7, 32, 1, 4, 4 );
mm256_intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, 256 );
bmw256_8way( &ctx.bmw, vhash, 32 );
@@ -91,7 +91,6 @@ int scanhash_lyra2rev3_8way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[8*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<3]);
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
@@ -105,10 +104,7 @@ int scanhash_lyra2rev3_8way( struct work *work, uint32_t max_nonce,
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
swab32_array( edata, pdata, 20 );
mm256_intrlv_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_8x32( vdata, pdata );
mm256_bswap32_intrlv80_8x32( vdata, pdata );
do
{
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
@@ -119,7 +115,7 @@ int scanhash_lyra2rev3_8way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 8; lane++ ) if ( hash7[lane] <= Htarg )
{
mm256_extr_lane_8x32( lane_hash, hash, lane, 256 );
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
@@ -208,7 +204,7 @@ int scanhash_lyra2rev3_4way( struct work *work, uint32_t max_nonce,
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
mm128_bswap_intrlv80_4x32( vdata, pdata );
mm128_bswap32_intrlv80_4x32( vdata, pdata );
do
{
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );

View File

@@ -60,7 +60,7 @@ int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce,
if ( opt_benchmark )
ptarget[7] = 0x0000ff;
mm128_bswap_intrlv80_4x32( vdata, pdata );
mm128_bswap32_intrlv80_4x32( vdata, pdata );
lyra2z_4way_midstate( vdata );
do {
@@ -119,8 +119,8 @@ void lyra2z_8way_hash( void *state, const void *input )
blake256_8way( &ctx_blake, input + (64*8), 16 );
blake256_8way_close( &ctx_blake, vhash );
mm256_dintrlv_8x32( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
dintrlv_8x32( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
LYRA2Z( lyra2z_8way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
LYRA2Z( lyra2z_8way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
@@ -146,7 +146,6 @@ int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[8*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
@@ -158,10 +157,7 @@ int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
if ( opt_benchmark )
ptarget[7] = 0x0000ff;
swab32_array( edata, pdata, 20 );
mm256_intrlv_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_8x32( vdata, pdata );
mm256_bswap32_intrlv80_8x32( vdata, pdata );
lyra2z_8way_midstate( vdata );
do {

View File

@@ -69,13 +69,13 @@ void phi2_hash_4way( void *state, const void *input )
LYRA2RE( &hashA[3][0], 32, &hashB[3][0], 32, &hashB[3][0], 32, 1, 8, 8 );
LYRA2RE( &hashA[3][8], 32, &hashB[3][8], 32, &hashB[3][8], 32, 1, 8, 8 );
mm256_intrlv_4x64( vhash, hashA[0], hashA[1], hashA[2], hashA[3], 512 );
intrlv_4x64( vhash, hashA[0], hashA[1], hashA[2], hashA[3], 512 );
jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash );
mm256_dintrlv_4x64( hash[0], hash[1], hash[2], hash[3], vhash, 512 );
dintrlv_4x64( hash[0], hash[1], hash[2], hash[3], vhash, 512 );
if ( hash[0][0] & 1 )
{
@@ -141,7 +141,7 @@ void phi2_hash_4way( void *state, const void *input )
(const BitSequence *)hash[3], 512 );
}
mm256_intrlv_4x64( vhash, hash[0], hash[1], hash[2], hash[3], 512 );
intrlv_4x64( vhash, hash[0], hash[1], hash[2], hash[3], 512 );
skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 );
@@ -217,7 +217,7 @@ int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[ lane<<1 ] < Htarg )
{
mm256_extr_lane_4x64( lane_hash, hash, lane, 256 );
extr_lane_4x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;