mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.23.3
This commit is contained in:
@@ -30,10 +30,10 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
||||
const uint32_t targ32_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
const __m512i last_byte = _mm512_set1_epi32( 0x80000000 );
|
||||
const __m512i last_byte = v512_32( 0x80000000 );
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const __m512i sixteen = _mm512_set1_epi32( 16 );
|
||||
const __m512i sixteen = v512_32( 16 );
|
||||
const bool bench = opt_benchmark;
|
||||
const __m256i bswap_shuf = mm256_bcast_m128( _mm_set_epi64x(
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) );
|
||||
@@ -42,42 +42,42 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
||||
sha256_transform_le( phash, pdata, sha256_iv );
|
||||
|
||||
// vectorize block 0 hash for second block
|
||||
mstate1[0] = _mm512_set1_epi32( phash[0] );
|
||||
mstate1[1] = _mm512_set1_epi32( phash[1] );
|
||||
mstate1[2] = _mm512_set1_epi32( phash[2] );
|
||||
mstate1[3] = _mm512_set1_epi32( phash[3] );
|
||||
mstate1[4] = _mm512_set1_epi32( phash[4] );
|
||||
mstate1[5] = _mm512_set1_epi32( phash[5] );
|
||||
mstate1[6] = _mm512_set1_epi32( phash[6] );
|
||||
mstate1[7] = _mm512_set1_epi32( phash[7] );
|
||||
mstate1[0] = v512_32( phash[0] );
|
||||
mstate1[1] = v512_32( phash[1] );
|
||||
mstate1[2] = v512_32( phash[2] );
|
||||
mstate1[3] = v512_32( phash[3] );
|
||||
mstate1[4] = v512_32( phash[4] );
|
||||
mstate1[5] = v512_32( phash[5] );
|
||||
mstate1[6] = v512_32( phash[6] );
|
||||
mstate1[7] = v512_32( phash[7] );
|
||||
|
||||
// second message block data, with nonce & padding
|
||||
buf[0] = _mm512_set1_epi32( pdata[16] );
|
||||
buf[1] = _mm512_set1_epi32( pdata[17] );
|
||||
buf[2] = _mm512_set1_epi32( pdata[18] );
|
||||
buf[0] = v512_32( pdata[16] );
|
||||
buf[1] = v512_32( pdata[17] );
|
||||
buf[2] = v512_32( pdata[18] );
|
||||
buf[3] = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n +1, n );
|
||||
buf[4] = last_byte;
|
||||
memset_zero_512( buf+5, 10 );
|
||||
buf[15] = _mm512_set1_epi32( 80*8 ); // bit count
|
||||
buf[15] = v512_32( 80*8 ); // bit count
|
||||
|
||||
// partially pre-expand & prehash second message block, avoiding the nonces
|
||||
sha256_16way_prehash_3rounds( mstate2, mexp_pre, buf, mstate1 );
|
||||
|
||||
// vectorize IV for 2nd & 3rd sha256
|
||||
istate[0] = _mm512_set1_epi32( sha256_iv[0] );
|
||||
istate[1] = _mm512_set1_epi32( sha256_iv[1] );
|
||||
istate[2] = _mm512_set1_epi32( sha256_iv[2] );
|
||||
istate[3] = _mm512_set1_epi32( sha256_iv[3] );
|
||||
istate[4] = _mm512_set1_epi32( sha256_iv[4] );
|
||||
istate[5] = _mm512_set1_epi32( sha256_iv[5] );
|
||||
istate[6] = _mm512_set1_epi32( sha256_iv[6] );
|
||||
istate[7] = _mm512_set1_epi32( sha256_iv[7] );
|
||||
istate[0] = v512_32( sha256_iv[0] );
|
||||
istate[1] = v512_32( sha256_iv[1] );
|
||||
istate[2] = v512_32( sha256_iv[2] );
|
||||
istate[3] = v512_32( sha256_iv[3] );
|
||||
istate[4] = v512_32( sha256_iv[4] );
|
||||
istate[5] = v512_32( sha256_iv[5] );
|
||||
istate[6] = v512_32( sha256_iv[6] );
|
||||
istate[7] = v512_32( sha256_iv[7] );
|
||||
|
||||
// initialize padding for 2nd & 3rd sha256
|
||||
block[ 8] = last_byte;
|
||||
memset_zero_512( block + 9, 6 );
|
||||
block[15] = _mm512_set1_epi32( 32*8 ); // bit count
|
||||
block[15] = v512_32( 32*8 ); // bit count
|
||||
|
||||
do
|
||||
{
|
||||
@@ -222,33 +222,33 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
||||
__m256i *noncev = vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
const __m256i last_byte = _mm256_set1_epi32( 0x80000000 );
|
||||
const __m256i eight = _mm256_set1_epi32( 8 );
|
||||
const __m256i last_byte = v256_32( 0x80000000 );
|
||||
const __m256i eight = v256_32( 8 );
|
||||
const __m256i bswap_shuf = mm256_bcast_m128( _mm_set_epi64x(
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) );
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
vdata[i] = _mm256_set1_epi32( pdata[i] );
|
||||
vdata[i] = v256_32( pdata[i] );
|
||||
|
||||
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||
|
||||
vdata[16+4] = last_byte;
|
||||
memset_zero_256( vdata+16 + 5, 10 );
|
||||
vdata[16+15] = _mm256_set1_epi32( 80*8 ); // bit count
|
||||
vdata[16+15] = v256_32( 80*8 ); // bit count
|
||||
|
||||
block[ 8] = last_byte;
|
||||
memset_zero_256( block + 9, 6 );
|
||||
block[15] = _mm256_set1_epi32( 32*8 ); // bit count
|
||||
block[15] = v256_32( 32*8 ); // bit count
|
||||
|
||||
// initialize state
|
||||
istate[0] = _mm256_set1_epi64x( 0x6A09E6676A09E667 );
|
||||
istate[1] = _mm256_set1_epi64x( 0xBB67AE85BB67AE85 );
|
||||
istate[2] = _mm256_set1_epi64x( 0x3C6EF3723C6EF372 );
|
||||
istate[3] = _mm256_set1_epi64x( 0xA54FF53AA54FF53A );
|
||||
istate[4] = _mm256_set1_epi64x( 0x510E527F510E527F );
|
||||
istate[5] = _mm256_set1_epi64x( 0x9B05688C9B05688C );
|
||||
istate[6] = _mm256_set1_epi64x( 0x1F83D9AB1F83D9AB );
|
||||
istate[7] = _mm256_set1_epi64x( 0x5BE0CD195BE0CD19 );
|
||||
istate[0] = v256_32( sha256_iv[0] );
|
||||
istate[1] = v256_32( sha256_iv[1] );
|
||||
istate[2] = v256_32( sha256_iv[2] );
|
||||
istate[3] = v256_32( sha256_iv[3] );
|
||||
istate[4] = v256_32( sha256_iv[4] );
|
||||
istate[5] = v256_32( sha256_iv[5] );
|
||||
istate[6] = v256_32( sha256_iv[6] );
|
||||
istate[7] = v256_32( sha256_iv[7] );
|
||||
|
||||
sha256_8way_transform_le( mstate1, vdata, istate );
|
||||
|
||||
@@ -313,31 +313,31 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
||||
__m128i *noncev = vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
const __m128i last_byte = _mm_set1_epi32( 0x80000000 );
|
||||
const __m128i four = _mm_set1_epi32( 4 );
|
||||
const __m128i last_byte = v128_32( 0x80000000 );
|
||||
const __m128i four = v128_32( 4 );
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
vdata[i] = _mm_set1_epi32( pdata[i] );
|
||||
vdata[i] = v128_32( pdata[i] );
|
||||
|
||||
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
|
||||
|
||||
vdata[16+4] = last_byte;
|
||||
memset_zero_128( vdata+16 + 5, 10 );
|
||||
vdata[16+15] = _mm_set1_epi32( 80*8 ); // bit count
|
||||
vdata[16+15] = v128_32( 80*8 ); // bit count
|
||||
|
||||
block[ 8] = last_byte;
|
||||
memset_zero_128( block + 9, 6 );
|
||||
block[15] = _mm_set1_epi32( 32*8 ); // bit count
|
||||
block[15] = v128_32( 32*8 ); // bit count
|
||||
|
||||
// initialize state
|
||||
istate[0] = _mm_set1_epi64x( 0x6A09E6676A09E667 );
|
||||
istate[1] = _mm_set1_epi64x( 0xBB67AE85BB67AE85 );
|
||||
istate[2] = _mm_set1_epi64x( 0x3C6EF3723C6EF372 );
|
||||
istate[3] = _mm_set1_epi64x( 0xA54FF53AA54FF53A );
|
||||
istate[4] = _mm_set1_epi64x( 0x510E527F510E527F );
|
||||
istate[5] = _mm_set1_epi64x( 0x9B05688C9B05688C );
|
||||
istate[6] = _mm_set1_epi64x( 0x1F83D9AB1F83D9AB );
|
||||
istate[7] = _mm_set1_epi64x( 0x5BE0CD195BE0CD19 );
|
||||
istate[0] = v128_32( sha256_iv[0] );
|
||||
istate[1] = v128_32( sha256_iv[1] );
|
||||
istate[2] = v128_32( sha256_iv[2] );
|
||||
istate[3] = v128_32( sha256_iv[3] );
|
||||
istate[4] = v128_32( sha256_iv[4] );
|
||||
istate[5] = v128_32( sha256_iv[5] );
|
||||
istate[6] = v128_32( sha256_iv[6] );
|
||||
istate[7] = v128_32( sha256_iv[7] );
|
||||
|
||||
// hash first 64 bytes of data
|
||||
sha256_4way_transform_le( mstate, vdata, istate );
|
||||
|
Reference in New Issue
Block a user