This commit is contained in:
Jay D Dee
2019-07-15 17:00:26 -04:00
parent e625ed5420
commit e2d5762ef2
63 changed files with 1973 additions and 2980 deletions

View File

@@ -196,9 +196,9 @@ SPH_XCAT( HASH, _addbits_and_close )(void *cc, unsigned ub, unsigned n,
ptr = (unsigned)sc->count & (SPH_BLEN - 1U);
#ifdef PW01
sc->buf[ptr>>3] = _mm256_set1_epi64x( 0x100 >> 8 );
sc->buf[ptr>>3] = m256_const1_64( 0x100 >> 8 );
#else
sc->buf[ptr>>3] = _mm256_set1_epi64x( 0x80 );
sc->buf[ptr>>3] = m256_const1_64( 0x80 );
#endif
ptr += 8;

View File

@@ -660,7 +660,7 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
const int pad = buf_size - 16;
ptr = (unsigned)sc->count & (buf_size - 1U);
sc->buf[ ptr>>3 ] = _mm256_set1_epi64x( 0x80 );
sc->buf[ ptr>>3 ] = m256_const1_64( 0x80 );
ptr += 8;
if ( ptr > pad )
{

View File

@@ -36,7 +36,6 @@ int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
{
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (32)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -60,10 +59,7 @@ int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
0 };
// Need big endian data
swab32_array( edata, pdata, 20 );
mm256_intrlv_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_8x32( vdata, pdata );
mm256_bswap32_intrlv80_8x32( vdata, pdata );
sha256_8way_init( &sha256_ctx8 );
sha256_8way( &sha256_ctx8, vdata, 64 );
@@ -84,7 +80,7 @@ int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
if ( !( hash7[ lane ] & mask ) )
{
// deinterleave hash for lane
mm256_extr_lane_8x32( lane_hash, hash, lane, 256 );
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
@@ -133,7 +129,6 @@ int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
{
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (32)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
@@ -157,9 +152,7 @@ int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
0xFFFF0000,
0 };
swab32_array( edata, pdata, 20 );
mm128_intrlv_4x32( vdata, edata, edata, edata, edata, 640 );
// mm128_bswap_intrlv80_4x32( vdata, pdata );
mm128_bswap32_intrlv80_4x32( vdata, pdata );
sha256_4way_init( &sha256_ctx4 );
sha256_4way( &sha256_ctx4, vdata, 64 );

View File

@@ -72,7 +72,7 @@ int scanhash_sha256t_11way( struct work *work, uint32_t max_nonce,
casti_m256i( dataz, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
casti_m128i( dataz, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
mm256_intrlv_8x32( datax, dataz, dataz, dataz, dataz,
intrlv_8x32( datax, dataz, dataz, dataz, dataz,
dataz, dataz, dataz, dataz, 640 );
mm64_interleave_2x32( datay, dataz, dataz, 640 );
@@ -99,7 +99,7 @@ int scanhash_sha256t_11way( struct work *work, uint32_t max_nonce,
for ( i = 0; i < 8; i++ ) if ( !( hash7[ i ] & mask ) )
{
// deinterleave hash for lane
mm256_extr_lane_8x32( lane_hash, hashx, i, 256 );
extr_lane_8x32( lane_hash, hashx, i, 256 );
if ( fulltest( lane_hash, ptarget ) )
{
pdata[19] = n + i;
@@ -163,7 +163,6 @@ int scanhash_sha256t_8way( struct work *work, uint32_t max_nonce,
{
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (32)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<3]);
uint32_t *pdata = work->data;
@@ -187,12 +186,9 @@ int scanhash_sha256t_8way( struct work *work, uint32_t max_nonce,
0xFFFF0000,
0 };
swab32_array( edata, pdata, 20 );
mm256_intrlv_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
// Need big endian data
// mm256_bswap_intrlv80_8x32( vdata, pdata );
mm256_bswap32_intrlv80_8x32( vdata, pdata );
sha256_8way_init( &sha256_ctx8 );
sha256_8way( &sha256_ctx8, vdata, 64 );
@@ -209,7 +205,7 @@ int scanhash_sha256t_8way( struct work *work, uint32_t max_nonce,
if ( !( hash7[ lane ] & mask ) )
{
// deinterleave hash for lane
mm256_extr_lane_8x32( lane_hash, hash, lane, 256 );
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
@@ -253,7 +249,6 @@ int scanhash_sha256t_4way( struct work *work, uint32_t max_nonce,
{
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (32)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
@@ -277,10 +272,7 @@ int scanhash_sha256t_4way( struct work *work, uint32_t max_nonce,
0xFFFF0000,
0 };
swab32_array( edata, pdata, 20 );
mm128_intrlv_4x32( vdata, edata, edata, edata, edata, 640 );
// mm128_bswap_intrlv80_4x32( vdata, pdata );
mm128_bswap32_intrlv80_4x32( vdata, pdata );
sha256_4way_init( &sha256_ctx4 );
sha256_4way( &sha256_ctx4, vdata, 64 );

View File

@@ -11,7 +11,7 @@ bool register_sha256t_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_sha256t_4way;
gate->hash = (void*)&sha256t_4way_hash;
#else
gate->optimizations = SHA_OPT;
gate->optimizations = SHA_OPT;
gate->scanhash = (void*)&scanhash_sha256t;
gate->hash = (void*)&sha256t_hash;
#endif
@@ -21,7 +21,11 @@ gate->optimizations = SHA_OPT;
bool register_sha256q_algo( algo_gate_t* gate )
{
#if defined(SHA256T_4WAY)
#if defined(SHA256T_8WAY)
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_sha256q_8way;
gate->hash = (void*)&sha256q_8way_hash;
#elif defined(SHA256T_4WAY)
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_sha256q_4way;
gate->hash = (void*)&sha256q_4way_hash;

View File

@@ -8,7 +8,7 @@
#if !defined(__SHA__)
#if defined(__AVX2__)
#define SHA256T_8WAY
#elif defined(__SSE2__)
#elif defined(__SSE2__)
#define SHA256T_4WAY
#endif
#endif