mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.9.0
This commit is contained in:
@@ -98,19 +98,19 @@ static const sph_u32 K256[64] = {
|
||||
|
||||
#define BSG2_0(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_ror_32(x, 2), mm_ror_32(x, 13) ), mm_ror_32( x, 22) )
|
||||
mm128_ror_32(x, 2), mm128_ror_32(x, 13) ), mm128_ror_32( x, 22) )
|
||||
|
||||
#define BSG2_1(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_ror_32(x, 6), mm_ror_32(x, 11) ), mm_ror_32( x, 25) )
|
||||
mm128_ror_32(x, 6), mm128_ror_32(x, 11) ), mm128_ror_32( x, 25) )
|
||||
|
||||
#define SSG2_0(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_ror_32(x, 7), mm_ror_32(x, 18) ), _mm_srli_epi32(x, 3) )
|
||||
mm128_ror_32(x, 7), mm128_ror_32(x, 18) ), _mm_srli_epi32(x, 3) )
|
||||
|
||||
#define SSG2_1(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_ror_32(x, 17), mm_ror_32(x, 19) ), _mm_srli_epi32(x, 10) )
|
||||
mm128_ror_32(x, 17), mm128_ror_32(x, 19) ), _mm_srli_epi32(x, 10) )
|
||||
|
||||
#define SHA2s_4WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
|
||||
do { \
|
||||
@@ -129,22 +129,22 @@ sha256_4way_round( __m128i *in, __m128i r[8] )
|
||||
register __m128i A, B, C, D, E, F, G, H;
|
||||
__m128i W[16];
|
||||
|
||||
W[ 0] = mm_bswap_32( in[ 0] );
|
||||
W[ 1] = mm_bswap_32( in[ 1] );
|
||||
W[ 2] = mm_bswap_32( in[ 2] );
|
||||
W[ 3] = mm_bswap_32( in[ 3] );
|
||||
W[ 4] = mm_bswap_32( in[ 4] );
|
||||
W[ 5] = mm_bswap_32( in[ 5] );
|
||||
W[ 6] = mm_bswap_32( in[ 6] );
|
||||
W[ 7] = mm_bswap_32( in[ 7] );
|
||||
W[ 8] = mm_bswap_32( in[ 8] );
|
||||
W[ 9] = mm_bswap_32( in[ 9] );
|
||||
W[10] = mm_bswap_32( in[10] );
|
||||
W[11] = mm_bswap_32( in[11] );
|
||||
W[12] = mm_bswap_32( in[12] );
|
||||
W[13] = mm_bswap_32( in[13] );
|
||||
W[14] = mm_bswap_32( in[14] );
|
||||
W[15] = mm_bswap_32( in[15] );
|
||||
W[ 0] = mm128_bswap_32( in[ 0] );
|
||||
W[ 1] = mm128_bswap_32( in[ 1] );
|
||||
W[ 2] = mm128_bswap_32( in[ 2] );
|
||||
W[ 3] = mm128_bswap_32( in[ 3] );
|
||||
W[ 4] = mm128_bswap_32( in[ 4] );
|
||||
W[ 5] = mm128_bswap_32( in[ 5] );
|
||||
W[ 6] = mm128_bswap_32( in[ 6] );
|
||||
W[ 7] = mm128_bswap_32( in[ 7] );
|
||||
W[ 8] = mm128_bswap_32( in[ 8] );
|
||||
W[ 9] = mm128_bswap_32( in[ 9] );
|
||||
W[10] = mm128_bswap_32( in[10] );
|
||||
W[11] = mm128_bswap_32( in[11] );
|
||||
W[12] = mm128_bswap_32( in[12] );
|
||||
W[13] = mm128_bswap_32( in[13] );
|
||||
W[14] = mm128_bswap_32( in[14] );
|
||||
W[15] = mm128_bswap_32( in[15] );
|
||||
|
||||
A = r[0];
|
||||
B = r[1];
|
||||
@@ -289,13 +289,13 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
|
||||
low = low << 3;
|
||||
|
||||
sc->buf[ pad >> 2 ] =
|
||||
mm_bswap_32( _mm_set1_epi32( high ) );
|
||||
mm128_bswap_32( _mm_set1_epi32( high ) );
|
||||
sc->buf[ ( pad+4 ) >> 2 ] =
|
||||
mm_bswap_32( _mm_set1_epi32( low ) );
|
||||
mm128_bswap_32( _mm_set1_epi32( low ) );
|
||||
sha256_4way_round( sc->buf, sc->val );
|
||||
|
||||
for ( u = 0; u < 8; u ++ )
|
||||
((__m128i*)dst)[u] = mm_bswap_32( sc->val[u] );
|
||||
((__m128i*)dst)[u] = mm128_bswap_32( sc->val[u] );
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "sha2-hash-4way.h"
|
||||
//#include <openssl/sha.h>
|
||||
|
||||
#if defined(SHA256T_8WAY)
|
||||
|
||||
@@ -25,11 +24,8 @@ void sha256t_8way_hash( void* output, const void* input )
|
||||
|
||||
sha256_8way_init( &ctx );
|
||||
sha256_8way( &ctx, vhash, 32 );
|
||||
sha256_8way_close( &ctx, vhash );
|
||||
sha256_8way_close( &ctx, output );
|
||||
|
||||
mm256_deinterleave_8x32( output, output+ 32, output+ 64, output+ 96,
|
||||
output+128, output+160, output+192, output+224,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_sha256t_8way( int thr_id, struct work *work,
|
||||
@@ -84,14 +80,22 @@ int scanhash_sha256t_8way( int thr_id, struct work *work,
|
||||
|
||||
sha256t_8way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( ( !( ( hash+(i<<3) )[7] & mask ) )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( !( hash7[ lane ] & mask ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
uint32_t lane_hash[8];
|
||||
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
nonces[ num_found++ ] = n + lane;
|
||||
work_set_target_ratio( work, lane_hash );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
@@ -122,10 +126,8 @@ void sha256t_4way_hash( void* output, const void* input )
|
||||
|
||||
sha256_4way_init( &ctx );
|
||||
sha256_4way( &ctx, vhash, 32 );
|
||||
sha256_4way_close( &ctx, vhash );
|
||||
sha256_4way_close( &ctx, output );
|
||||
|
||||
mm_deinterleave_4x32( output, output+ 32, output+ 64, output+ 96,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_sha256t_4way( int thr_id, struct work *work,
|
||||
@@ -133,6 +135,8 @@ int scanhash_sha256t_4way( int thr_id, struct work *work,
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t lane_hash[8];
|
||||
uint32_t edata[20] __attribute__ ((aligned (32)));;
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -159,7 +163,7 @@ int scanhash_sha256t_4way( int thr_id, struct work *work,
|
||||
for ( int k = 0; k < 19; k++ )
|
||||
be32enc( &edata[k], pdata[k] );
|
||||
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
sha256_4way_init( &sha256_ctx4 );
|
||||
sha256_4way( &sha256_ctx4, vdata, 64 );
|
||||
|
||||
@@ -175,15 +179,20 @@ int scanhash_sha256t_4way( int thr_id, struct work *work,
|
||||
|
||||
sha256t_4way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( !( ( hash+(i<<3) )[7] & mask ) )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( !( hash7[ lane ] & mask ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
nonces[ num_found++ ] = n + lane;
|
||||
work_set_target_ratio( work, lane_hash );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
@@ -3,16 +3,18 @@
|
||||
bool register_sha256t_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(SHA256T_8WAY)
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t_8way;
|
||||
gate->hash = (void*)&sha256t_8way_hash;
|
||||
#elif defined(SHA256T_4WAY)
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t_4way;
|
||||
gate->hash = (void*)&sha256t_4way_hash;
|
||||
#else
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t;
|
||||
gate->hash = (void*)&sha256t_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user