This commit is contained in:
Jay D Dee
2019-05-19 13:39:45 -04:00
parent bfd1c002f9
commit e1aead3c76
139 changed files with 10907 additions and 4218 deletions

View File

@@ -98,19 +98,19 @@ static const sph_u32 K256[64] = {
#define BSG2_0(x) \
_mm_xor_si128( _mm_xor_si128( \
mm_ror_32(x, 2), mm_ror_32(x, 13) ), mm_ror_32( x, 22) )
mm128_ror_32(x, 2), mm128_ror_32(x, 13) ), mm128_ror_32( x, 22) )
#define BSG2_1(x) \
_mm_xor_si128( _mm_xor_si128( \
mm_ror_32(x, 6), mm_ror_32(x, 11) ), mm_ror_32( x, 25) )
mm128_ror_32(x, 6), mm128_ror_32(x, 11) ), mm128_ror_32( x, 25) )
#define SSG2_0(x) \
_mm_xor_si128( _mm_xor_si128( \
mm_ror_32(x, 7), mm_ror_32(x, 18) ), _mm_srli_epi32(x, 3) )
mm128_ror_32(x, 7), mm128_ror_32(x, 18) ), _mm_srli_epi32(x, 3) )
#define SSG2_1(x) \
_mm_xor_si128( _mm_xor_si128( \
mm_ror_32(x, 17), mm_ror_32(x, 19) ), _mm_srli_epi32(x, 10) )
mm128_ror_32(x, 17), mm128_ror_32(x, 19) ), _mm_srli_epi32(x, 10) )
#define SHA2s_4WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
do { \
@@ -129,22 +129,22 @@ sha256_4way_round( __m128i *in, __m128i r[8] )
register __m128i A, B, C, D, E, F, G, H;
__m128i W[16];
W[ 0] = mm_bswap_32( in[ 0] );
W[ 1] = mm_bswap_32( in[ 1] );
W[ 2] = mm_bswap_32( in[ 2] );
W[ 3] = mm_bswap_32( in[ 3] );
W[ 4] = mm_bswap_32( in[ 4] );
W[ 5] = mm_bswap_32( in[ 5] );
W[ 6] = mm_bswap_32( in[ 6] );
W[ 7] = mm_bswap_32( in[ 7] );
W[ 8] = mm_bswap_32( in[ 8] );
W[ 9] = mm_bswap_32( in[ 9] );
W[10] = mm_bswap_32( in[10] );
W[11] = mm_bswap_32( in[11] );
W[12] = mm_bswap_32( in[12] );
W[13] = mm_bswap_32( in[13] );
W[14] = mm_bswap_32( in[14] );
W[15] = mm_bswap_32( in[15] );
W[ 0] = mm128_bswap_32( in[ 0] );
W[ 1] = mm128_bswap_32( in[ 1] );
W[ 2] = mm128_bswap_32( in[ 2] );
W[ 3] = mm128_bswap_32( in[ 3] );
W[ 4] = mm128_bswap_32( in[ 4] );
W[ 5] = mm128_bswap_32( in[ 5] );
W[ 6] = mm128_bswap_32( in[ 6] );
W[ 7] = mm128_bswap_32( in[ 7] );
W[ 8] = mm128_bswap_32( in[ 8] );
W[ 9] = mm128_bswap_32( in[ 9] );
W[10] = mm128_bswap_32( in[10] );
W[11] = mm128_bswap_32( in[11] );
W[12] = mm128_bswap_32( in[12] );
W[13] = mm128_bswap_32( in[13] );
W[14] = mm128_bswap_32( in[14] );
W[15] = mm128_bswap_32( in[15] );
A = r[0];
B = r[1];
@@ -289,13 +289,13 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
low = low << 3;
sc->buf[ pad >> 2 ] =
mm_bswap_32( _mm_set1_epi32( high ) );
mm128_bswap_32( _mm_set1_epi32( high ) );
sc->buf[ ( pad+4 ) >> 2 ] =
mm_bswap_32( _mm_set1_epi32( low ) );
mm128_bswap_32( _mm_set1_epi32( low ) );
sha256_4way_round( sc->buf, sc->val );
for ( u = 0; u < 8; u ++ )
((__m128i*)dst)[u] = mm_bswap_32( sc->val[u] );
((__m128i*)dst)[u] = mm128_bswap_32( sc->val[u] );
}
#if defined(__AVX2__)

View File

@@ -4,7 +4,6 @@
#include <string.h>
#include <stdio.h>
#include "sha2-hash-4way.h"
//#include <openssl/sha.h>
#if defined(SHA256T_8WAY)
@@ -25,11 +24,8 @@ void sha256t_8way_hash( void* output, const void* input )
sha256_8way_init( &ctx );
sha256_8way( &ctx, vhash, 32 );
sha256_8way_close( &ctx, vhash );
sha256_8way_close( &ctx, output );
mm256_deinterleave_8x32( output, output+ 32, output+ 64, output+ 96,
output+128, output+160, output+192, output+224,
vhash, 256 );
}
int scanhash_sha256t_8way( int thr_id, struct work *work,
@@ -84,14 +80,22 @@ int scanhash_sha256t_8way( int thr_id, struct work *work,
sha256t_8way_hash( hash, vdata );
for ( int i = 0; i < 8; i++ )
if ( ( !( ( hash+(i<<3) )[7] & mask ) )
&& fulltest( hash+(i<<3), ptarget ) )
{
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
uint32_t *hash7 = &(hash[7<<3]);
for ( int lane = 0; lane < 8; lane++ )
if ( !( hash7[ lane ] & mask ) )
{
// deinterleave hash for lane
uint32_t lane_hash[8];
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
nonces[ num_found++ ] = n + lane;
work_set_target_ratio( work, lane_hash );
}
}
n += 8;
} while ( (num_found == 0) && (n < max_nonce)
@@ -122,10 +126,8 @@ void sha256t_4way_hash( void* output, const void* input )
sha256_4way_init( &ctx );
sha256_4way( &ctx, vhash, 32 );
sha256_4way_close( &ctx, vhash );
sha256_4way_close( &ctx, output );
mm_deinterleave_4x32( output, output+ 32, output+ 64, output+ 96,
vhash, 256 );
}
int scanhash_sha256t_4way( int thr_id, struct work *work,
@@ -133,6 +135,8 @@ int scanhash_sha256t_4way( int thr_id, struct work *work,
{
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t lane_hash[8];
uint32_t edata[20] __attribute__ ((aligned (32)));;
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -159,7 +163,7 @@ int scanhash_sha256t_4way( int thr_id, struct work *work,
for ( int k = 0; k < 19; k++ )
be32enc( &edata[k], pdata[k] );
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
sha256_4way_init( &sha256_ctx4 );
sha256_4way( &sha256_ctx4, vdata, 64 );
@@ -175,15 +179,20 @@ int scanhash_sha256t_4way( int thr_id, struct work *work,
sha256t_4way_hash( hash, vdata );
for ( int i = 0; i < 4; i++ )
if ( ( !( ( hash+(i<<3) )[7] & mask ) )
&& fulltest( hash+(i<<3), ptarget ) )
for ( int lane = 0; lane < 4; lane++ )
if ( !( hash7[ lane ] & mask ) )
{
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
nonces[ num_found++ ] = n + lane;
work_set_target_ratio( work, lane_hash );
}
}
n += 4;
n += 4;
} while ( (num_found == 0) && (n < max_nonce)
&& !work_restart[thr_id].restart );

View File

@@ -3,16 +3,18 @@
bool register_sha256t_algo( algo_gate_t* gate )
{
#if defined(SHA256T_8WAY)
gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->scanhash = (void*)&scanhash_sha256t_8way;
gate->hash = (void*)&sha256t_8way_hash;
#elif defined(SHA256T_4WAY)
gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->scanhash = (void*)&scanhash_sha256t_4way;
gate->hash = (void*)&sha256t_4way_hash;
#else
gate->optimizations = SSE42_OPT | AVX2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_sha256t;
gate->hash = (void*)&sha256t_hash;
#endif
gate->optimizations = SSE42_OPT | AVX2_OPT | SHA_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
}