This commit is contained in:
Jay D Dee
2019-06-13 11:20:27 -04:00
parent 7fec680835
commit b2331375a3
70 changed files with 4413 additions and 4360 deletions

View File

@@ -42,7 +42,7 @@
#include <stddef.h>
#include "sph_types.h"
#include "avxdefs.h"
#include "simd-utils.h"
#if defined(__SSE2__)
//#if defined(__SSE4_2__)

View File

@@ -1,3 +1,4 @@
#if 0
#include <stddef.h>
#include <string.h>
@@ -65,7 +66,7 @@ static const uint32_t K256[64] =
_mm_xor_si64( _mm_xor_si64( \
mm64_ror_32(x,2), mm64_ror_32(x,13) ), _mm_srli_pi32(x,22) )
#define BSG2_0z(x) ( ror_32(x,2) ^ ror_32(x,13) ^ ((x)>>22) )
#define BSG2_0z(x) ( u32_ror_32(x,2) ^ u32_ror_32(x,13) ^ ((x)>>22) )
#define BSG2_1x(x) \
_mm256_xor_si256( _mm256_xor_si256( \
@@ -75,7 +76,7 @@ static const uint32_t K256[64] =
_mm_xor_si64( _mm_xor_si64( \
mm64_ror_32(x,6), mm64_ror_32(x,11) ), _mm_srli_pi32(x,25) )
#define BSG2_1z(x) ( ror_32(x,6) ^ ror_32(x,11) ^ ((x)>>25) )
#define BSG2_1z(x) ( u32_ror_32(x,6) ^ u32_ror_32(x,11) ^ ((x)>>25) )
#define SSG2_0x(x) \
_mm256_xor_si256( _mm256_xor_si256( \
@@ -85,7 +86,7 @@ static const uint32_t K256[64] =
_mm_xor_si64( _mm_xor_si64( \
mm64_ror_32(x,7), mm64_ror_32(x,18) ), _mm_srli_pi32(x,3) )
#define SSG2_0z(x) (( ror_32(x,7) ^ ror_32(x,18) ) ^ ((x)>>3) )
#define SSG2_0z(x) (( u32_ror_32(x,7) ^ u32_ror_32(x,18) ) ^ ((x)>>3) )
#define SSG2_1x(x) \
_mm256_xor_si256( _mm256_xor_si256( \
@@ -95,7 +96,7 @@ static const uint32_t K256[64] =
_mm_xor_si64( _mm_xor_si64( \
mm64_ror_32(x,17), mm64_ror_32(x,19) ), _mm_srli_pi32(x,10) )
#define SSG2_1z(x) ( ror_32(x,17) ^ ror_32(x,19) ^ ((x)>>10) )
#define SSG2_1z(x) ( u32_ror_32(x,17) ^ u32_ror_32(x,19) ^ ((x)>>10) )
#define SHA2x_MEXP( a, b, c, d ) \
_mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( \
@@ -449,7 +450,7 @@ void sha256_11way_update( sha256_11way_context *ctx, const void *datax,
if ( clen > len )
clen = len;
memcpy_256( ctx->bufx + (ptr>>2), vdatax + (ptr>>2), clen>>2 );
memcpy_64 ( ctx->bufy + (ptr>>2), vdatay + (ptr>>2), clen>>2 );
memcpy_m64( ctx->bufy + (ptr>>2), vdatay + (ptr>>2), clen>>2 );
memcpy ( ctx->bufz + ptr, idataz + ptr, clen );
ptr += clen;
len -= clen;
@@ -486,19 +487,19 @@ void sha256_11way_close( sha256_11way_context *ctx, void *dstx, void *dsty,
if ( ptr > pad )
{
memset_zero_256( ctx->bufx + (ptr>>2), (buf_size - ptr) >> 2 );
memset_zero_64( ctx->bufy + (ptr>>2), (buf_size - ptr) >> 2 );
memset_zero_m64( ctx->bufy + (ptr>>2), (buf_size - ptr) >> 2 );
memset( ctx->bufz + (ptr>>2), 0, (buf_size - ptr) >> 2 );
sha256_11way_round( ctx->bufx, ctx->valx,
ctx->bufy, ctx->valy,
ctx->bufz, ctx->valz );
memset_zero_256( ctx->bufx, pad >> 2 );
memset_zero_64( ctx->bufy, pad >> 2 );
memset_zero_m64( ctx->bufy, pad >> 2 );
memset( ctx->bufz, 0, pad >> 2 );
}
else
{
memset_zero_256( ctx->bufx + (ptr>>2), (pad - ptr) >> 2 );
memset_zero_64( ctx->bufy + (ptr>>2), (pad - ptr) >> 2 );
memset_zero_m64( ctx->bufy + (ptr>>2), (pad - ptr) >> 2 );
memset( ctx->bufz + (ptr>>2), 0, (pad - ptr) >> 2 );
}
@@ -534,3 +535,4 @@ void sha256_11way_close( sha256_11way_context *ctx, void *dstx, void *dsty,
}
#endif
#endif // 0

View File

@@ -36,7 +36,6 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
{
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (32)));
uint32_t edata[20] __attribute__ ((aligned (32)));;
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
@@ -59,12 +58,7 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
0 };
// Need big endian data
casti_m256i( edata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
casti_m256i( edata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
mm256_bswap_intrlv80_8x32( vdata, pdata );
sha256_8way_init( &sha256_ctx8 );
sha256_8way( &sha256_ctx8, vdata, 64 );
@@ -73,11 +67,10 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
*noncev = mm256_bswap_32(
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
pdata[19] = n;
*noncev = mm256_bswap_32(
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
pdata[19] = n;
sha256q_8way_hash( hash, vdata );
uint32_t *hash7 = &(hash[7<<3]);
@@ -86,27 +79,19 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
if ( !( hash7[ lane ] & mask ) )
{
// deinterleave hash for lane
uint32_t lane_hash[8];
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
uint32_t lane_hash[8];
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
work_set_target_ratio( work, lane_hash );
if ( submit_work( mythr, work ) )
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
accepted_share_count + rejected_share_count + 1,
thr_id, lane );
else
applog( LOG_WARNING, "Failed to submit share." );
}
}
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr, lane );
}
}
n += 8;
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
break;
}
*hashes_done = n - first_nonce + 1;
return 0;
}
@@ -146,7 +131,6 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t hash[8*4] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t lane_hash[8];
uint32_t edata[20] __attribute__ ((aligned (32)));;
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
@@ -168,13 +152,7 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
0xFFFF0000,
0 };
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
mm128_bswap_intrlv80_4x32( vdata, pdata );
sha256_4way_init( &sha256_ctx4 );
sha256_4way( &sha256_ctx4, vdata, 64 );
@@ -183,7 +161,7 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do {
*noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) );
pdata[19] = n;
pdata[19] = n;
sha256q_4way_hash( hash, vdata );
@@ -192,25 +170,16 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
{
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
work_set_target_ratio( work, lane_hash );
if ( submit_work( mythr, work ) )
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
accepted_share_count + rejected_share_count + 1,
thr_id, lane );
else
applog( LOG_WARNING, "Failed to submit share." );
submit_solution( work, lane_hash, mythr, lane );
}
}
n += 4;
n += 4;
} while ( (n < max_nonce - 4) && !work_restart[thr_id].restart );
break;
}
*hashes_done = n - first_nonce + 1;
return 0;
}

View File

@@ -72,7 +72,7 @@ int scanhash_sha256t_11way( int thr_id, struct work *work, uint32_t max_nonce,
casti_m256i( dataz, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
casti_m128i( dataz, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
mm256_interleave_8x32( datax, dataz, dataz, dataz, dataz,
mm256_intrlv_8x32( datax, dataz, dataz, dataz, dataz,
dataz, dataz, dataz, dataz, 640 );
mm64_interleave_2x32( datay, dataz, dataz, 640 );
@@ -156,15 +156,15 @@ void sha256t_8way_hash( void* output, const void* input )
sha256_8way_init( &ctx );
sha256_8way( &ctx, vhash, 32 );
sha256_8way_close( &ctx, output );
}
int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (32)));
uint32_t edata[20] __attribute__ ((aligned (32)));;
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<3]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
@@ -187,12 +187,7 @@ int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
0 };
// Need big endian data
casti_m256i( edata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
casti_m256i( edata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
mm256_bswap_intrlv80_8x32( vdata, pdata );
sha256_8way_init( &sha256_ctx8 );
sha256_8way( &sha256_ctx8, vdata, 64 );
@@ -201,29 +196,22 @@ int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
*noncev = mm256_bswap_32(
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
*noncev = mm256_bswap_32( _mm256_set_epi32(
n+7,n+6,n+5,n+4,n+3,n+2,n+1,n ) );
pdata[19] = n;
sha256t_8way_hash( hash, vdata );
uint32_t *hash7 = &(hash[7<<3]);
for ( int lane = 0; lane < 8; lane++ )
if ( !( hash7[ lane ] & mask ) )
{
// deinterleave hash for lane
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr, lane );
}
}
}
n += 8;
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
break;
}
@@ -253,7 +241,6 @@ void sha256t_4way_hash( void* output, const void* input )
sha256_4way_init( &ctx );
sha256_4way( &ctx, vhash, 32 );
sha256_4way_close( &ctx, output );
}
int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
@@ -262,7 +249,6 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (32)));;
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -278,20 +264,14 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
0xFFF,
0xFFFF,
0x10000000 };
const uint32_t masks[] = { 0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0 };
const uint32_t masks[] = { 0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0 };
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
mm128_bswap_intrlv80_4x32( vdata, pdata );
sha256_4way_init( &sha256_ctx4 );
sha256_4way( &sha256_ctx4, vdata, 64 );
@@ -300,7 +280,7 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do {
*noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) );
pdata[19] = n;
pdata[19] = n;
sha256t_4way_hash( hash, vdata );
@@ -308,15 +288,13 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
if ( !( hash7[ lane ] & mask ) )
{
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr, lane );
}
}
n += 4;
}
}
n += 4;
} while ( (n < max_nonce - 4) && !work_restart[thr_id].restart );
break;
}

View File

@@ -2,11 +2,7 @@
bool register_sha256t_algo( algo_gate_t* gate )
{
#if defined(SHA256T_11WAY)
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_sha256t_11way;
gate->hash = (void*)&sha256t_11way_hash;
#elif defined(SHA256T_8WAY)
#if defined(SHA256T_8WAY)
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_sha256t_8way;
gate->hash = (void*)&sha256t_8way_hash;
@@ -25,11 +21,7 @@ gate->optimizations = SHA_OPT;
bool register_sha256q_algo( algo_gate_t* gate )
{
#if defined(SHA256T_8WAY)
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_sha256q_8way;
gate->hash = (void*)&sha256q_8way_hash;
#elif defined(SHA256T_4WAY)
#if defined(SHA256T_4WAY)
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_sha256q_4way;
gate->hash = (void*)&sha256q_4way_hash;

View File

@@ -11,24 +11,12 @@
#endif
#if defined(__AVX2__)
#define SHA256T_8WAY
// #define SHA256T_11WAY
#endif
#endif
bool register_sha256t_algo( algo_gate_t* gate );
bool register_sha256q_algo( algo_gate_t* gate );
#if defined(SHA256T_11WAY)
void sha256t_11way_hash( void *outx, void *outy, void *outz, const void *inpx,
const void *inpy, const void *inpz );
int scanhash_sha256t_11way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
//void sha256q_8way_hash( void *output, const void *input );
//int scanhash_sha256q_11way( int thr_id, struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr );
#endif
#if defined(SHA256T_8WAY)
void sha256t_8way_hash( void *output, const void *input );