This commit is contained in:
Jay D Dee
2023-06-14 11:07:40 -04:00
parent de564ccbde
commit 57a6b7b58b
31 changed files with 3724 additions and 3345 deletions

View File

@@ -36,31 +36,31 @@ int scanhash_sha256dt_16way( struct work *work, const uint32_t max_nonce,
__m512i *noncev = vdata + 19;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
const __m512i last_byte = m512_const1_32( 0x80000000 );
const __m512i sixteen = m512_const1_32( 16 );
const __m512i last_byte = _mm512_set1_epi32( 0x80000000 );
const __m512i sixteen = _mm512_set1_epi32( 16 );
for ( int i = 0; i < 19; i++ )
vdata[i] = mm512_bcast_i32( pdata[i] );
vdata[i] = _mm512_set1_epi32( pdata[i] );
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
vdata[16+4] = last_byte;
memset_zero_512( vdata+16 + 5, 10 );
vdata[16+15] = mm512_bcast_i32( 0x480 );
vdata[16+15] = _mm512_set1_epi32( 0x480 );
block[ 8] = last_byte;
memset_zero_512( block + 9, 6 );
block[15] = mm512_bcast_i32( 0x300 );
block[15] = _mm512_set1_epi32( 0x300 );
initstate[0] = mm512_bcast_i64( 0xdfa9bf2cdfa9bf2c );
initstate[1] = mm512_bcast_i64( 0xb72074d4b72074d4 );
initstate[2] = mm512_bcast_i64( 0x6bb011226bb01122 );
initstate[3] = mm512_bcast_i64( 0xd338e869d338e869 );
initstate[4] = mm512_bcast_i64( 0xaa3ff126aa3ff126 );
initstate[5] = mm512_bcast_i64( 0x475bbf30475bbf30 );
initstate[6] = mm512_bcast_i64( 0x8fd52e5b8fd52e5b );
initstate[7] = mm512_bcast_i64( 0x9f75c9ad9f75c9ad );
initstate[0] = _mm512_set1_epi64( 0xdfa9bf2cdfa9bf2c );
initstate[1] = _mm512_set1_epi64( 0xb72074d4b72074d4 );
initstate[2] = _mm512_set1_epi64( 0x6bb011226bb01122 );
initstate[3] = _mm512_set1_epi64( 0xd338e869d338e869 );
initstate[4] = _mm512_set1_epi64( 0xaa3ff126aa3ff126 );
initstate[5] = _mm512_set1_epi64( 0x475bbf30475bbf30 );
initstate[6] = _mm512_set1_epi64( 0x8fd52e5b8fd52e5b );
initstate[7] = _mm512_set1_epi64( 0x9f75c9ad9f75c9ad );
sha256_16way_transform_le( midstate1, vdata, initstate );
@@ -118,31 +118,31 @@ int scanhash_sha256dt_8way( struct work *work, const uint32_t max_nonce,
__m256i *noncev = vdata + 19;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
const __m256i last_byte = m256_const1_32( 0x80000000 );
const __m256i eight = m256_const1_32( 8 );
const __m256i last_byte = _mm256_set1_epi32( 0x80000000 );
const __m256i eight = _mm256_set1_epi32( 8 );
for ( int i = 0; i < 19; i++ )
vdata[i] = mm256_bcast_i32( pdata[i] );
vdata[i] = _mm256_set1_epi32( pdata[i] );
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
vdata[16+4] = last_byte;
memset_zero_256( vdata+16 + 5, 10 );
vdata[16+15] = mm256_bcast_i32( 0x480 );
vdata[16+15] = _mm256_set1_epi32( 0x480 );
block[ 8] = last_byte;
memset_zero_256( block + 9, 6 );
block[15] = mm256_bcast_i32( 0x300 );
block[15] = _mm256_set1_epi32( 0x300 );
// initialize state
initstate[0] = mm256_bcast_i64( 0xdfa9bf2cdfa9bf2c );
initstate[1] = mm256_bcast_i64( 0xb72074d4b72074d4 );
initstate[2] = mm256_bcast_i64( 0x6bb011226bb01122 );
initstate[3] = mm256_bcast_i64( 0xd338e869d338e869 );
initstate[4] = mm256_bcast_i64( 0xaa3ff126aa3ff126 );
initstate[5] = mm256_bcast_i64( 0x475bbf30475bbf30 );
initstate[6] = mm256_bcast_i64( 0x8fd52e5b8fd52e5b );
initstate[7] = mm256_bcast_i64( 0x9f75c9ad9f75c9ad );
initstate[0] = _mm256_set1_epi64x( 0xdfa9bf2cdfa9bf2c );
initstate[1] = _mm256_set1_epi64x( 0xb72074d4b72074d4 );
initstate[2] = _mm256_set1_epi64x( 0x6bb011226bb01122 );
initstate[3] = _mm256_set1_epi64x( 0xd338e869d338e869 );
initstate[4] = _mm256_set1_epi64x( 0xaa3ff126aa3ff126 );
initstate[5] = _mm256_set1_epi64x( 0x475bbf30475bbf30 );
initstate[6] = _mm256_set1_epi64x( 0x8fd52e5b8fd52e5b );
initstate[7] = _mm256_set1_epi64x( 0x9f75c9ad9f75c9ad );
sha256_8way_transform_le( midstate1, vdata, initstate );
@@ -198,31 +198,31 @@ int scanhash_sha256dt_4way( struct work *work, const uint32_t max_nonce,
__m128i *noncev = vdata + 19;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
const __m128i last_byte = m128_const1_32( 0x80000000 );
const __m128i four = m128_const1_32( 4 );
const __m128i last_byte = _mm_set1_epi32( 0x80000000 );
const __m128i four = _mm_set1_epi32( 4 );
for ( int i = 0; i < 19; i++ )
vdata[i] = mm128_bcast_i32( pdata[i] );
vdata[i] = _mm_set1_epi32( pdata[i] );
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
vdata[16+4] = last_byte;
memset_zero_128( vdata+16 + 5, 10 );
vdata[16+15] = mm128_bcast_i32( 0x480 );
vdata[16+15] = _mm_set1_epi32( 0x480 );
block[ 8] = last_byte;
memset_zero_128( block + 9, 6 );
block[15] = mm128_bcast_i32( 0x300 );
block[15] = _mm_set1_epi32( 0x300 );
// initialize state
initstate[0] = mm128_bcast_i64( 0xdfa9bf2cdfa9bf2c );
initstate[1] = mm128_bcast_i64( 0xb72074d4b72074d4 );
initstate[2] = mm128_bcast_i64( 0x6bb011226bb01122 );
initstate[3] = mm128_bcast_i64( 0xd338e869d338e869 );
initstate[4] = mm128_bcast_i64( 0xaa3ff126aa3ff126 );
initstate[5] = mm128_bcast_i64( 0x475bbf30475bbf30 );
initstate[6] = mm128_bcast_i64( 0x8fd52e5b8fd52e5b );
initstate[7] = mm128_bcast_i64( 0x9f75c9ad9f75c9ad );
initstate[0] = _mm_set1_epi64x( 0xdfa9bf2cdfa9bf2c );
initstate[1] = _mm_set1_epi64x( 0xb72074d4b72074d4 );
initstate[2] = _mm_set1_epi64x( 0x6bb011226bb01122 );
initstate[3] = _mm_set1_epi64x( 0xd338e869d338e869 );
initstate[4] = _mm_set1_epi64x( 0xaa3ff126aa3ff126 );
initstate[5] = _mm_set1_epi64x( 0x475bbf30475bbf30 );
initstate[6] = _mm_set1_epi64x( 0x8fd52e5b8fd52e5b );
initstate[7] = _mm_set1_epi64x( 0x9f75c9ad9f75c9ad );
// hash first 64 bytes of data
sha256_4way_transform_le( midstate, vdata, initstate );

View File

@@ -155,14 +155,14 @@ sha512_8way_round( sha512_8way_context *ctx, __m512i *in, __m512i r[8] )
}
else
{
A = m512_const1_64( 0x6A09E667F3BCC908 );
B = m512_const1_64( 0xBB67AE8584CAA73B );
C = m512_const1_64( 0x3C6EF372FE94F82B );
D = m512_const1_64( 0xA54FF53A5F1D36F1 );
E = m512_const1_64( 0x510E527FADE682D1 );
F = m512_const1_64( 0x9B05688C2B3E6C1F );
G = m512_const1_64( 0x1F83D9ABFB41BD6B );
H = m512_const1_64( 0x5BE0CD19137E2179 );
A = _mm512_set1_epi64( 0x6A09E667F3BCC908 );
B = _mm512_set1_epi64( 0xBB67AE8584CAA73B );
C = _mm512_set1_epi64( 0x3C6EF372FE94F82B );
D = _mm512_set1_epi64( 0xA54FF53A5F1D36F1 );
E = _mm512_set1_epi64( 0x510E527FADE682D1 );
F = _mm512_set1_epi64( 0x9B05688C2B3E6C1F );
G = _mm512_set1_epi64( 0x1F83D9ABFB41BD6B );
H = _mm512_set1_epi64( 0x5BE0CD19137E2179 );
}
for ( i = 0; i < 80; i += 8 )
@@ -191,14 +191,14 @@ sha512_8way_round( sha512_8way_context *ctx, __m512i *in, __m512i r[8] )
else
{
ctx->initialized = true;
r[0] = _mm512_add_epi64( A, m512_const1_64( 0x6A09E667F3BCC908 ) );
r[1] = _mm512_add_epi64( B, m512_const1_64( 0xBB67AE8584CAA73B ) );
r[2] = _mm512_add_epi64( C, m512_const1_64( 0x3C6EF372FE94F82B ) );
r[3] = _mm512_add_epi64( D, m512_const1_64( 0xA54FF53A5F1D36F1 ) );
r[4] = _mm512_add_epi64( E, m512_const1_64( 0x510E527FADE682D1 ) );
r[5] = _mm512_add_epi64( F, m512_const1_64( 0x9B05688C2B3E6C1F ) );
r[6] = _mm512_add_epi64( G, m512_const1_64( 0x1F83D9ABFB41BD6B ) );
r[7] = _mm512_add_epi64( H, m512_const1_64( 0x5BE0CD19137E2179 ) );
r[0] = _mm512_add_epi64( A, _mm512_set1_epi64( 0x6A09E667F3BCC908 ) );
r[1] = _mm512_add_epi64( B, _mm512_set1_epi64( 0xBB67AE8584CAA73B ) );
r[2] = _mm512_add_epi64( C, _mm512_set1_epi64( 0x3C6EF372FE94F82B ) );
r[3] = _mm512_add_epi64( D, _mm512_set1_epi64( 0xA54FF53A5F1D36F1 ) );
r[4] = _mm512_add_epi64( E, _mm512_set1_epi64( 0x510E527FADE682D1 ) );
r[5] = _mm512_add_epi64( F, _mm512_set1_epi64( 0x9B05688C2B3E6C1F ) );
r[6] = _mm512_add_epi64( G, _mm512_set1_epi64( 0x1F83D9ABFB41BD6B ) );
r[7] = _mm512_add_epi64( H, _mm512_set1_epi64( 0x5BE0CD19137E2179 ) );
}
}
@@ -239,11 +239,8 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst )
unsigned ptr;
const int buf_size = 128;
const int pad = buf_size - 16;
const __m512i shuff_bswap64 = m512_const_64(
0x38393a3b3c3d3e3f, 0x3031323334353637,
0x28292a2b2c2d2e2f, 0x2021222324252627,
0x18191a1b1c1d1e1f, 0x1011121314151617,
0x08090a0b0c0d0e0f, 0x0001020304050607 );
const __m512i shuff_bswap64 = mm512_bcast_m128( _mm_set_epi64x(
0x08090a0b0c0d0e0f, 0x0001020304050607 ) );
ptr = (unsigned)sc->count & (buf_size - 1U);
sc->buf[ ptr>>3 ] = m512_const1_64( 0x80 );
@@ -440,10 +437,8 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
unsigned ptr;
const int buf_size = 128;
const int pad = buf_size - 16;
const __m256i shuff_bswap64 = m256_const_64( 0x18191a1b1c1d1e1f,
0x1011121314151617,
0x08090a0b0c0d0e0f,
0x0001020304050607 );
const __m256i shuff_bswap64 = mm256_bcast_m128( _mm_set_epi64x(
0x08090a0b0c0d0e0f, 0x0001020304050607 ) );
ptr = (unsigned)sc->count & (buf_size - 1U);
sc->buf[ ptr>>3 ] = m256_const1_64( 0x80 );

View File

@@ -15,14 +15,14 @@ static void sha512256d_8way_init( sha512_8way_context *ctx )
{
ctx->count = 0;
ctx->initialized = true;
ctx->val[0] = mm512_bcast_i64( 0x22312194FC2BF72C );
ctx->val[1] = mm512_bcast_i64( 0x9F555FA3C84C64C2 );
ctx->val[2] = mm512_bcast_i64( 0x2393B86B6F53B151 );
ctx->val[3] = mm512_bcast_i64( 0x963877195940EABD );
ctx->val[4] = mm512_bcast_i64( 0x96283EE2A88EFFE3 );
ctx->val[5] = mm512_bcast_i64( 0xBE5E1E2553863992 );
ctx->val[6] = mm512_bcast_i64( 0x2B0199FC2C85B8AA );
ctx->val[7] = mm512_bcast_i64( 0x0EB72DDC81C52CA2 );
ctx->val[0] = _mm512_set1_epi64( 0x22312194FC2BF72C );
ctx->val[1] = _mm512_set1_epi64( 0x9F555FA3C84C64C2 );
ctx->val[2] = _mm512_set1_epi64( 0x2393B86B6F53B151 );
ctx->val[3] = _mm512_set1_epi64( 0x963877195940EABD );
ctx->val[4] = _mm512_set1_epi64( 0x96283EE2A88EFFE3 );
ctx->val[5] = _mm512_set1_epi64( 0xBE5E1E2553863992 );
ctx->val[6] = _mm512_set1_epi64( 0x2B0199FC2C85B8AA );
ctx->val[7] = _mm512_set1_epi64( 0x0EB72DDC81C52CA2 );
}
int scanhash_sha512256d_8way( struct work *work, uint32_t max_nonce,
@@ -42,7 +42,7 @@ int scanhash_sha512256d_8way( struct work *work, uint32_t max_nonce,
__m512i *noncev = (__m512i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
const __m512i eight = mm512_bcast_i64( 0x0000000800000000 );
const __m512i eight = _mm512_set1_epi64( 0x0000000800000000 );
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
@@ -83,14 +83,14 @@ static void sha512256d_4way_init( sha512_4way_context *ctx )
{
ctx->count = 0;
ctx->initialized = true;
ctx->val[0] = mm256_bcast_i64( 0x22312194FC2BF72C );
ctx->val[1] = mm256_bcast_i64( 0x9F555FA3C84C64C2 );
ctx->val[2] = mm256_bcast_i64( 0x2393B86B6F53B151 );
ctx->val[3] = mm256_bcast_i64( 0x963877195940EABD );
ctx->val[4] = mm256_bcast_i64( 0x96283EE2A88EFFE3 );
ctx->val[5] = mm256_bcast_i64( 0xBE5E1E2553863992 );
ctx->val[6] = mm256_bcast_i64( 0x2B0199FC2C85B8AA );
ctx->val[7] = mm256_bcast_i64( 0x0EB72DDC81C52CA2 );
ctx->val[0] = _mm256_set1_epi64x( 0x22312194FC2BF72C );
ctx->val[1] = _mm256_set1_epi64x( 0x9F555FA3C84C64C2 );
ctx->val[2] = _mm256_set1_epi64x( 0x2393B86B6F53B151 );
ctx->val[3] = _mm256_set1_epi64x( 0x963877195940EABD );
ctx->val[4] = _mm256_set1_epi64x( 0x96283EE2A88EFFE3 );
ctx->val[5] = _mm256_set1_epi64x( 0xBE5E1E2553863992 );
ctx->val[6] = _mm256_set1_epi64x( 0x2B0199FC2C85B8AA );
ctx->val[7] = _mm256_set1_epi64x( 0x0EB72DDC81C52CA2 );
}
int scanhash_sha512256d_4way( struct work *work, uint32_t max_nonce,
@@ -110,7 +110,7 @@ int scanhash_sha512256d_4way( struct work *work, uint32_t max_nonce,
__m256i *noncev = (__m256i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
const __m256i four = mm256_bcast_i64( 0x0000000400000000 );
const __m256i four = _mm256_set1_epi64x( 0x0000000400000000 );
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(