mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.9.5.4
This commit is contained in:
@@ -196,9 +196,9 @@ SPH_XCAT( HASH, _addbits_and_close )(void *cc, unsigned ub, unsigned n,
|
||||
ptr = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
|
||||
#ifdef PW01
|
||||
sc->buf[ptr>>3] = _mm256_set1_epi64x( 0x100 >> 8 );
|
||||
sc->buf[ptr>>3] = m256_const1_64( 0x100 >> 8 );
|
||||
#else
|
||||
sc->buf[ptr>>3] = _mm256_set1_epi64x( 0x80 );
|
||||
sc->buf[ptr>>3] = m256_const1_64( 0x80 );
|
||||
#endif
|
||||
ptr += 8;
|
||||
|
||||
|
@@ -660,7 +660,7 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
|
||||
const int pad = buf_size - 16;
|
||||
|
||||
ptr = (unsigned)sc->count & (buf_size - 1U);
|
||||
sc->buf[ ptr>>3 ] = _mm256_set1_epi64x( 0x80 );
|
||||
sc->buf[ ptr>>3 ] = m256_const1_64( 0x80 );
|
||||
ptr += 8;
|
||||
if ( ptr > pad )
|
||||
{
|
||||
|
@@ -36,7 +36,6 @@ int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -60,10 +59,7 @@ int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
|
||||
0 };
|
||||
|
||||
// Need big endian data
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_intrlv_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
// mm256_bswap_intrlv80_8x32( vdata, pdata );
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
sha256_8way_init( &sha256_ctx8 );
|
||||
sha256_8way( &sha256_ctx8, vdata, 64 );
|
||||
|
||||
@@ -84,7 +80,7 @@ int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
|
||||
if ( !( hash7[ lane ] & mask ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
mm256_extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
@@ -133,7 +129,6 @@ int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -157,9 +152,7 @@ int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm128_intrlv_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
// mm128_bswap_intrlv80_4x32( vdata, pdata );
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
sha256_4way_init( &sha256_ctx4 );
|
||||
sha256_4way( &sha256_ctx4, vdata, 64 );
|
||||
|
||||
|
@@ -72,7 +72,7 @@ int scanhash_sha256t_11way( struct work *work, uint32_t max_nonce,
|
||||
casti_m256i( dataz, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||
casti_m128i( dataz, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
mm256_intrlv_8x32( datax, dataz, dataz, dataz, dataz,
|
||||
intrlv_8x32( datax, dataz, dataz, dataz, dataz,
|
||||
dataz, dataz, dataz, dataz, 640 );
|
||||
mm64_interleave_2x32( datay, dataz, dataz, 640 );
|
||||
|
||||
@@ -99,7 +99,7 @@ int scanhash_sha256t_11way( struct work *work, uint32_t max_nonce,
|
||||
for ( i = 0; i < 8; i++ ) if ( !( hash7[ i ] & mask ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
mm256_extr_lane_8x32( lane_hash, hashx, i, 256 );
|
||||
extr_lane_8x32( lane_hash, hashx, i, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + i;
|
||||
@@ -163,7 +163,6 @@ int scanhash_sha256t_8way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -187,12 +186,9 @@ int scanhash_sha256t_8way( struct work *work, uint32_t max_nonce,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_intrlv_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
|
||||
// Need big endian data
|
||||
// mm256_bswap_intrlv80_8x32( vdata, pdata );
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
sha256_8way_init( &sha256_ctx8 );
|
||||
sha256_8way( &sha256_ctx8, vdata, 64 );
|
||||
|
||||
@@ -209,7 +205,7 @@ int scanhash_sha256t_8way( struct work *work, uint32_t max_nonce,
|
||||
if ( !( hash7[ lane ] & mask ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
mm256_extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
@@ -253,7 +249,6 @@ int scanhash_sha256t_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -277,10 +272,7 @@ int scanhash_sha256t_4way( struct work *work, uint32_t max_nonce,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm128_intrlv_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
// mm128_bswap_intrlv80_4x32( vdata, pdata );
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
sha256_4way_init( &sha256_ctx4 );
|
||||
sha256_4way( &sha256_ctx4, vdata, 64 );
|
||||
|
||||
|
@@ -11,7 +11,7 @@ bool register_sha256t_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_sha256t_4way;
|
||||
gate->hash = (void*)&sha256t_4way_hash;
|
||||
#else
|
||||
gate->optimizations = SHA_OPT;
|
||||
gate->optimizations = SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t;
|
||||
gate->hash = (void*)&sha256t_hash;
|
||||
#endif
|
||||
@@ -21,7 +21,11 @@ gate->optimizations = SHA_OPT;
|
||||
|
||||
bool register_sha256q_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(SHA256T_4WAY)
|
||||
#if defined(SHA256T_8WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256q_8way;
|
||||
gate->hash = (void*)&sha256q_8way_hash;
|
||||
#elif defined(SHA256T_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256q_4way;
|
||||
gate->hash = (void*)&sha256q_4way_hash;
|
||||
|
@@ -8,7 +8,7 @@
|
||||
#if !defined(__SHA__)
|
||||
#if defined(__AVX2__)
|
||||
#define SHA256T_8WAY
|
||||
#elif defined(__SSE2__)
|
||||
#elif defined(__SSE2__)
|
||||
#define SHA256T_4WAY
|
||||
#endif
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user