This commit is contained in:
Jay D Dee
2020-06-18 17:30:26 -04:00
parent 51a1d91abd
commit cdd587537e
27 changed files with 455 additions and 567 deletions

View File

@@ -65,6 +65,13 @@ If not what makes it happen or not happen?
Change Log Change Log
---------- ----------
v3.14.3
#265: more mutex changes to reduce blocking with high thread count.
#267: fixed hodl algo potential memory alignment issue,
add warning when thread count is not valid for mining hodl algo.
v3.14.2 v3.14.2
The second line of the Share Accepted log is no longer displayed, The second line of the Share Accepted log is no longer displayed,

View File

@@ -128,6 +128,119 @@ int scanhash_generic( struct work *work, uint32_t max_nonce,
return 0; return 0;
} }
#if defined(__AVX2__)
//int scanhash_4way_64_64( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr )
//int scanhash_4way_64_640( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr )
int scanhash_4way_64in_32out( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash32[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash32_d7 = &(hash32[ 7*4 ]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32_d7 = ptarget[7];
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if ( likely( algo_gate.hash( hash32, vdata, thr_id ) ) )
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 && !bench ) )
{
extr_lane_4x32( lane_hash, hash32, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
//int scanhash_8way_32_32( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr )
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
//int scanhash_8way_64_64( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr )
//int scanhash_8way_64_640( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr )
int scanhash_8way_64in_32out( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash32[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash32_d7 = &(hash32[7*8]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32_d7 = ptarget[7];
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if ( likely( algo_gate.hash( hash32, vdata, thr_id ) ) )
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( ( hash32_d7[ lane ] <= targ32_d7 ) && !bench ) )
{
extr_lane_8x32( lane_hash, hash32, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
//int scanhash_16way_32_32( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr )
#endif
int null_hash() int null_hash()
{ {
applog(LOG_WARNING,"SWERR: null_hash unsafe null function"); applog(LOG_WARNING,"SWERR: null_hash unsafe null function");

View File

@@ -110,12 +110,12 @@ inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
typedef struct typedef struct
{ {
// Mandatory functions, one of these is mandatory. If the default scanhash // Mandatory functions, one of these is mandatory. If a generic scanhash
// is used a custom hash function must be registered, with a custom scanhash // is used a custom hash function must be registered, with a custom scanhash
// the hash function is not necessary. // the custom hash function can be called directly and doesn't need to be
// registered in the gate.
int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* ); int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );
//int ( *hash ) ( void*, const void*, uint32_t ) ;
int ( *hash ) ( void*, const void*, int ); int ( *hash ) ( void*, const void*, int );
//optional, safe to use default in most cases //optional, safe to use default in most cases
@@ -203,19 +203,61 @@ void four_way_not_tested();
#define STD_WORK_DATA_SIZE 128 #define STD_WORK_DATA_SIZE 128
#define STD_WORK_CMP_SIZE 76 #define STD_WORK_CMP_SIZE 76
#define JR2_NONCE_INDEX 39 // 8 bit offset //#define JR2_NONCE_INDEX 39 // 8 bit offset
// These indexes are only used with JSON RPC2 and are not gated. // These indexes are only used with JSON RPC2 and are not gated.
#define JR2_WORK_CMP_INDEX_2 43 //#define JR2_WORK_CMP_INDEX_2 43
#define JR2_WORK_CMP_SIZE_2 33 //#define JR2_WORK_CMP_SIZE_2 33
// deprecated, use generic instead // deprecated, use generic instead
int null_scanhash(); int null_scanhash();
// Default generic, may be used in many cases. // Default generic, may be used in many cases.
// N-way is more complicated, requires many different implementations
// depending on architecture, input format, and output format.
// Naming convention is scanhash_[N]way_[input format]in_[output format]out
// N = number of lanes
// input/output format:
// 32: 32 bit interleaved parallel lanes
// 64: 64 bit interleaved parallel lanes
// 640: input only, not interleaved, contiguous serial 640 bit lanes.
// 256: output only, not interleaved, contiguous serial 256 bit lanes.
int scanhash_generic( struct work *work, uint32_t max_nonce, int scanhash_generic( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ); uint64_t *hashes_done, struct thr_info *mythr );
#if defined(__AVX2__)
//int scanhash_4way_64in_64out( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr );
//int scanhash_4way_64in_256out( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr );
int scanhash_4way_64in_32out( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
//int scanhash_8way_32in_32out( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr );
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
//int scanhash_8way_64in_64out( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr );
//int scanhash_8way_64in_256out( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr );
int scanhash_8way_64in_32out( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
//int scanhash_16way_32in_32out( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr );
#endif
// displays warning // displays warning
int null_hash (); int null_hash ();
@@ -263,7 +305,7 @@ int std_get_work_data_size();
// by calling the algo's register function. // by calling the algo's register function.
bool register_algo_gate( int algo, algo_gate_t *gate ); bool register_algo_gate( int algo, algo_gate_t *gate );
// Called by algos toverride any default gate functions that are applicable // Called by algos to verride any default gate functions that are applicable
// and do any other algo-specific initialization. // and do any other algo-specific initialization.
// The register functions for all the algos can be declared here to reduce // The register functions for all the algos can be declared here to reduce
// compiler warnings but that's just more work for devs adding new algos. // compiler warnings but that's just more work for devs adding new algos.

View File

@@ -78,7 +78,6 @@ void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
uint32_t extraheader[32] = { 0 }; uint32_t extraheader[32] = { 0 };
int headersize = 0; int headersize = 0;
uint32_t* extradata = (uint32_t*) sctx->xnonce1; uint32_t* extradata = (uint32_t*) sctx->xnonce1;
size_t t;
int i; int i;
// getwork over stratum, getwork merkle + header passed in coinb1 // getwork over stratum, getwork merkle + header passed in coinb1
@@ -87,9 +86,6 @@ void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
sizeof(extraheader) ); sizeof(extraheader) );
memcpy( extraheader, &sctx->job.coinbase[32], headersize ); memcpy( extraheader, &sctx->job.coinbase[32], headersize );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header // Assemble block header
memset( g_work->data, 0, sizeof(g_work->data) ); memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version ); g_work->data[0] = le32dec( sctx->job.version );

View File

@@ -99,13 +99,13 @@ void hodl_build_block_header( struct work* g_work, uint32_t version,
// called only by thread 0, saves a backup of g_work // called only by thread 0, saves a backup of g_work
void hodl_get_new_work( struct work* work, struct work* g_work) void hodl_get_new_work( struct work* work, struct work* g_work)
{ {
pthread_mutex_lock( &g_work_lock ); pthread_rwlock_rdlock( &g_work_lock );
work_free( &hodl_work ); work_free( &hodl_work );
work_copy( &hodl_work, g_work ); work_copy( &hodl_work, g_work );
hodl_work.data[ algo_gate.nonce_index ] = ( clock() + rand() ) % 9999; hodl_work.data[ algo_gate.nonce_index ] = ( clock() + rand() ) % 9999;
pthread_mutex_unlock( &g_work_lock ); pthread_rwlock_unlock( &g_work_lock );
} }
json_t *hodl_longpoll_rpc_call( CURL *curl, int *err, char* lp_url ) json_t *hodl_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
@@ -159,11 +159,10 @@ bool register_hodl_algo( algo_gate_t* gate )
applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version."); applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
return false; return false;
#endif #endif
// if ( TOTAL_CHUNKS % opt_n_threads )
// { if ( GARBAGE_SIZE % opt_n_threads )
// applog(LOG_ERR,"Thread count must be power of 2."); applog( LOG_WARNING,"WARNING: Thread count must be power of 2. Miner may crash or produce invalid hash!" );
// return false;
// }
pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads ); pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
gate->optimizations = SSE42_OPT | AES_OPT | AVX2_OPT; gate->optimizations = SSE42_OPT | AES_OPT | AVX2_OPT;
gate->scanhash = (void*)&hodl_scanhash; gate->scanhash = (void*)&hodl_scanhash;
@@ -175,7 +174,7 @@ bool register_hodl_algo( algo_gate_t* gate )
gate->resync_threads = (void*)&hodl_resync_threads; gate->resync_threads = (void*)&hodl_resync_threads;
gate->do_this_thread = (void*)&hodl_do_this_thread; gate->do_this_thread = (void*)&hodl_do_this_thread;
gate->work_cmp_size = 76; gate->work_cmp_size = 76;
hodl_scratchbuf = (unsigned char*)malloc( 1 << 30 ); hodl_scratchbuf = (unsigned char*)_mm_malloc( 1 << 30, 64 );
allow_getwork = false; allow_getwork = false;
opt_target_factor = 8388608.0; opt_target_factor = 8388608.0;
return ( hodl_scratchbuf != NULL ); return ( hodl_scratchbuf != NULL );

View File

@@ -70,7 +70,7 @@ int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
int threadNumber = mythr->id; int threadNumber = mythr->id;
CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf; CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf;
CacheEntry Cache[AES_PARALLEL_N]; CacheEntry Cache[AES_PARALLEL_N] __attribute__ ((aligned (64)));
__m128i* data[AES_PARALLEL_N]; __m128i* data[AES_PARALLEL_N];
const __m128i* next[AES_PARALLEL_N]; const __m128i* next[AES_PARALLEL_N];
uint32_t CollisionCount = 0; uint32_t CollisionCount = 0;

View File

@@ -215,9 +215,6 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
size_t t; size_t t;
algo_gate.gen_merkle_root( merkle_tree, sctx ); algo_gate.gen_merkle_root( merkle_tree, sctx );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
algo_gate.build_block_header( g_work, le32dec( sctx->job.version ), algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
(uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree, (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits), NULL ); le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits), NULL );
@@ -225,7 +222,6 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
g_work->data[ 20+t ] = ((uint32_t*)sctx->job.extra)[t]; g_work->data[ 20+t ] = ((uint32_t*)sctx->job.extra)[t];
} }
bool register_phi2_algo( algo_gate_t* gate ) bool register_phi2_algo( algo_gate_t* gate )
{ {
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;

View File

@@ -156,7 +156,7 @@ int scanhash_zr5( struct work *work, uint32_t max_nonce,
void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id, void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t* end_nonce_ptr ) uint32_t* end_nonce_ptr )
{ {
pthread_mutex_lock( &g_work_lock ); pthread_rwlock_rdlock( &g_work_lock );
// ignore POK in first word // ignore POK in first word
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t) const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
@@ -174,7 +174,7 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
else else
++(*nonceptr); ++(*nonceptr);
pthread_mutex_unlock( &g_work_lock ); pthread_rwlock_unlock( &g_work_lock );
} }
void zr5_display_pok( struct work* work ) void zr5_display_pok( struct work* work )

View File

@@ -69,13 +69,9 @@ void lbry_build_block_header( struct work* g_work, uint32_t version,
void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{ {
unsigned char merkle_root[64] = { 0 }; unsigned char merkle_root[64] = { 0 };
size_t t;
int i; int i;
algo_gate.gen_merkle_root( merkle_root, sctx ); algo_gate.gen_merkle_root( merkle_root, sctx );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
memset( g_work->data, 0, sizeof(g_work->data) ); memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version ); g_work->data[0] = le32dec( sctx->job.version );

View File

@@ -227,7 +227,7 @@ bool initialize_torture_garden()
} }
// Produce a 32-byte hash from 80-byte input data // Produce a 32-byte hash from 80-byte input data
int minotaur_hash( void *output, const void *input ) int minotaur_hash( void *output, const void *input, int thr_id )
{ {
unsigned char hash[64] __attribute__ ((aligned (64))); unsigned char hash[64] __attribute__ ((aligned (64)));

View File

@@ -135,18 +135,16 @@ void x16rt_getAlgoString( const uint32_t *timeHash, char *output)
void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{ {
uint32_t merkleroothash[8];
uint32_t witmerkleroothash[8];
uint32_t denom10[8];
uint32_t denom100[8];
uint32_t denom1000[8];
uint32_t denom10000[8];
int i;
uchar merkle_tree[64] = { 0 }; uchar merkle_tree[64] = { 0 };
size_t t;
algo_gate.gen_merkle_root( merkle_tree, sctx ); algo_gate.gen_merkle_root( merkle_tree, sctx );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
// algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
// (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
// le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits) );
int i;
memset( g_work->data, 0, sizeof(g_work->data) ); memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version ); g_work->data[0] = le32dec( sctx->job.version );
@@ -164,35 +162,35 @@ void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
g_work->data[31] = 0x00000280; g_work->data[31] = 0x00000280;
for ( i = 0; i < 8; i++ ) for ( i = 0; i < 8; i++ )
g_work->merkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i); merkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i);
for ( i = 0; i < 8; i++ ) for ( i = 0; i < 8; i++ )
g_work->witmerkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i); witmerkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i);
for ( i = 0; i < 8; i++ ) for ( i = 0; i < 8; i++ )
g_work->denom10[i] = le32dec((uint32_t *)sctx->job.denom10 + i); denom10[i] = le32dec((uint32_t *)sctx->job.denom10 + i);
for ( i = 0; i < 8; i++ ) for ( i = 0; i < 8; i++ )
g_work->denom100[i] = le32dec((uint32_t *)sctx->job.denom100 + i); denom100[i] = le32dec((uint32_t *)sctx->job.denom100 + i);
for ( i = 0; i < 8; i++ ) for ( i = 0; i < 8; i++ )
g_work->denom1000[i] = le32dec((uint32_t *)sctx->job.denom1000 + i); denom1000[i] = le32dec((uint32_t *)sctx->job.denom1000 + i);
for ( i = 0; i < 8; i++ ) for ( i = 0; i < 8; i++ )
g_work->denom10000[i] = le32dec((uint32_t *)sctx->job.denom10000 + i); denom10000[i] = le32dec((uint32_t *)sctx->job.denom10000 + i);
uint32_t pofnhash[8]; uint32_t pofnhash[8];
memset(pofnhash, 0x00, 32); memset(pofnhash, 0x00, 32);
char denom10_str [ 2 * sizeof( g_work->denom10 ) + 1 ]; char denom10_str [ 2 * sizeof( denom10 ) + 1 ];
char denom100_str [ 2 * sizeof( g_work->denom100 ) + 1 ]; char denom100_str [ 2 * sizeof( denom100 ) + 1 ];
char denom1000_str [ 2 * sizeof( g_work->denom1000 ) + 1 ]; char denom1000_str [ 2 * sizeof( denom1000 ) + 1 ];
char denom10000_str [ 2 * sizeof( g_work->denom10000 ) + 1 ]; char denom10000_str [ 2 * sizeof( denom10000 ) + 1 ];
char merkleroot_str [ 2 * sizeof( g_work->merkleroothash ) + 1 ]; char merkleroot_str [ 2 * sizeof( merkleroothash ) + 1 ];
char witmerkleroot_str[ 2 * sizeof( g_work->witmerkleroothash ) + 1 ]; char witmerkleroot_str[ 2 * sizeof( witmerkleroothash ) + 1 ];
char pofn_str [ 2 * sizeof( pofnhash ) + 1 ]; char pofn_str [ 2 * sizeof( pofnhash ) + 1 ];
cbin2hex( denom10_str, (char*) g_work->denom10, 32 ); cbin2hex( denom10_str, (char*) denom10, 32 );
cbin2hex( denom100_str, (char*) g_work->denom100, 32 ); cbin2hex( denom100_str, (char*) denom100, 32 );
cbin2hex( denom1000_str, (char*) g_work->denom1000, 32 ); cbin2hex( denom1000_str, (char*) denom1000, 32 );
cbin2hex( denom10000_str, (char*) g_work->denom10000, 32 ); cbin2hex( denom10000_str, (char*) denom10000, 32 );
cbin2hex( merkleroot_str, (char*) g_work->merkleroothash, 32 ); cbin2hex( merkleroot_str, (char*) merkleroothash, 32 );
cbin2hex( witmerkleroot_str, (char*) g_work->witmerkleroothash, 32 ); cbin2hex( witmerkleroot_str, (char*) witmerkleroothash, 32 );
cbin2hex( pofn_str, (char*) pofnhash, 32 ); cbin2hex( pofn_str, (char*) pofnhash, 32 );
if ( true ) if ( true )

View File

@@ -58,7 +58,7 @@ union _sonoa_8way_context_overlay
typedef union _sonoa_8way_context_overlay sonoa_8way_context_overlay; typedef union _sonoa_8way_context_overlay sonoa_8way_context_overlay;
int sonoa_8way_hash( void *state, const void *input, int thrid ) int sonoa_8way_hash( void *state, const void *input, int thr_id )
{ {
uint64_t vhash[8*8] __attribute__ ((aligned (128))); uint64_t vhash[8*8] __attribute__ ((aligned (128)));
uint64_t vhashA[8*8] __attribute__ ((aligned (64))); uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
@@ -186,7 +186,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
#endif #endif
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// 2 // 2
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 ); bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
@@ -302,7 +302,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
hamsi512_8way_update( &ctx.hamsi, vhash, 64 ); hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
hamsi512_8way_close( &ctx.hamsi, vhash ); hamsi512_8way_close( &ctx.hamsi, vhash );
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// 3 // 3
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 ); bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
@@ -432,7 +432,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 ); sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 ); sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// 4 // 4
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -630,7 +630,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
#endif #endif
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// 5 // 5
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 ); bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
@@ -783,7 +783,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// 6 // 6
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -952,7 +952,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// 7 // 7
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -1118,49 +1118,6 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
return 1; return 1;
} }
int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*16] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[7<<3]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32 = ptarget[7];
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if ( sonoa_8way_hash( hash, vdata, thr_id ) )
for ( int lane = 0; lane < 8; lane++ )
if unlikely( ( hashd7[ lane ] <= targ32 ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined(SONOA_4WAY) #elif defined(SONOA_4WAY)
union _sonoa_4way_context_overlay union _sonoa_4way_context_overlay
@@ -1186,7 +1143,7 @@ union _sonoa_4way_context_overlay
typedef union _sonoa_4way_context_overlay sonoa_4way_context_overlay; typedef union _sonoa_4way_context_overlay sonoa_4way_context_overlay;
int sonoa_4way_hash( void *state, const void *input, int thrid ) int sonoa_4way_hash( void *state, const void *input, int thr_id )
{ {
uint64_t hash0[8] __attribute__ ((aligned (64))); uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64))); uint64_t hash1[8] __attribute__ ((aligned (64)));
@@ -1250,7 +1207,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
echo_full( &ctx.echo, (BitSequence *)hash3, 512, echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, 64 ); (const BitSequence *)hash3, 64 );
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// 2 // 2
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1310,7 +1267,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
hamsi512_4way_update( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// 3 // 3
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
@@ -1375,7 +1332,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// 4 // 4
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1472,7 +1429,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
shavite512_2way_init( &ctx.shavite ); shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 ); shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// 5 // 5
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 ); rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
@@ -1557,7 +1514,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// 6 // 6
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1650,7 +1607,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// 7 // 7
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1745,46 +1702,4 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
return 1; return 1;
} }
int scanhash_sonoa_4way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hashd7 = &( hash[7<<2] );
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
const uint32_t targ32 = ptarget[7];
uint32_t n = first_nonce;
__m256i *noncev = (__m256i*)vdata + 9;
const int thr_id = mythr->id;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if ( sonoa_4way_hash( hash, vdata, thr_id ) )
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hashd7[ lane ] <= targ32 ) )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#endif #endif

View File

@@ -3,14 +3,13 @@
bool register_sonoa_algo( algo_gate_t* gate ) bool register_sonoa_algo( algo_gate_t* gate )
{ {
#if defined (SONOA_8WAY) #if defined (SONOA_8WAY)
gate->scanhash = (void*)&scanhash_sonoa_8way; gate->scanhash = (void*)&scanhash_8way_64in_32out;
gate->hash = (void*)&sonoa_8way_hash; gate->hash = (void*)&sonoa_8way_hash;
#elif defined (SONOA_4WAY) #elif defined (SONOA_4WAY)
gate->scanhash = (void*)&scanhash_sonoa_4way; gate->scanhash = (void*)&scanhash_4way_64in_32out;
gate->hash = (void*)&sonoa_4way_hash; gate->hash = (void*)&sonoa_4way_hash;
#else #else
init_sonoa_ctx(); init_sonoa_ctx();
// gate->scanhash = (void*)&scanhash_sonoa;
gate->hash = (void*)&sonoa_hash; gate->hash = (void*)&sonoa_hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;

View File

@@ -14,21 +14,15 @@ bool register_sonoa_algo( algo_gate_t* gate );
#if defined(SONOA_8WAY) #if defined(SONOA_8WAY)
int sonoa_8way_hash( void *state, const void *input, int thrid ); int sonoa_8way_hash( void *state, const void *input, int thr_id );
int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(SONOA_4WAY) #elif defined(SONOA_4WAY)
int sonoa_4way_hash( void *state, const void *input, int thrid ); int sonoa_4way_hash( void *state, const void *input, int thr_id );
int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#else #else
int sonoa_hash( void *state, const void *input, int thrid ); int sonoa_hash( void *state, const void *input, int thr_id );
int scanhash_sonoa( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void init_sonoa_ctx(); void init_sonoa_ctx();
#endif #endif

View File

@@ -83,7 +83,7 @@ void init_sonoa_ctx()
sph_haval256_5_init(&sonoa_ctx.haval); sph_haval256_5_init(&sonoa_ctx.haval);
}; };
int sonoa_hash( void *state, const void *input, int thrid ) int sonoa_hash( void *state, const void *input, int thr_id )
{ {
uint8_t hash[128] __attribute__ ((aligned (64))); uint8_t hash[128] __attribute__ ((aligned (64)));
sonoa_ctx_holder ctx __attribute__ ((aligned (64))); sonoa_ctx_holder ctx __attribute__ ((aligned (64)));
@@ -132,7 +132,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
sph_echo512_close(&ctx.echo, hash); sph_echo512_close(&ctx.echo, hash);
#endif #endif
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// //
sph_bmw512_init( &ctx.bmw); sph_bmw512_init( &ctx.bmw);
@@ -190,7 +190,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
sph_hamsi512(&ctx.hamsi, hash, 64); sph_hamsi512(&ctx.hamsi, hash, 64);
sph_hamsi512_close(&ctx.hamsi, hash); sph_hamsi512_close(&ctx.hamsi, hash);
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// //
sph_bmw512_init( &ctx.bmw); sph_bmw512_init( &ctx.bmw);
@@ -252,7 +252,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
sph_fugue512(&ctx.fugue, hash, 64); sph_fugue512(&ctx.fugue, hash, 64);
sph_fugue512_close(&ctx.fugue, hash); sph_fugue512_close(&ctx.fugue, hash);
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// //
sph_bmw512_init( &ctx.bmw); sph_bmw512_init( &ctx.bmw);
@@ -336,7 +336,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
sph_shavite512(&ctx.shavite, hash, 64); sph_shavite512(&ctx.shavite, hash, 64);
sph_shavite512_close(&ctx.shavite, hash); sph_shavite512_close(&ctx.shavite, hash);
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// //
sph_bmw512_init( &ctx.bmw); sph_bmw512_init( &ctx.bmw);
@@ -410,7 +410,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
sph_whirlpool(&ctx.whirlpool, hash, 64); sph_whirlpool(&ctx.whirlpool, hash, 64);
sph_whirlpool_close(&ctx.whirlpool, hash); sph_whirlpool_close(&ctx.whirlpool, hash);
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// //
sph_bmw512_init( &ctx.bmw); sph_bmw512_init( &ctx.bmw);
sph_bmw512(&ctx.bmw, hash, 64); sph_bmw512(&ctx.bmw, hash, 64);
@@ -487,7 +487,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
sph_whirlpool(&ctx.whirlpool, hash, 64); sph_whirlpool(&ctx.whirlpool, hash, 64);
sph_whirlpool_close(&ctx.whirlpool, hash); sph_whirlpool_close(&ctx.whirlpool, hash);
if ( work_restart[thrid].restart ) return 0; if ( work_restart[thr_id].restart ) return 0;
// //
sph_bmw512_init( &ctx.bmw); sph_bmw512_init( &ctx.bmw);

View File

@@ -57,7 +57,7 @@ union _x17_8way_context_overlay
} __attribute__ ((aligned (64))); } __attribute__ ((aligned (64)));
typedef union _x17_8way_context_overlay x17_8way_context_overlay; typedef union _x17_8way_context_overlay x17_8way_context_overlay;
int x17_8way_hash( void *state, const void *input ) int x17_8way_hash( void *state, const void *input, int thr_id )
{ {
uint64_t vhash[8*8] __attribute__ ((aligned (128))); uint64_t vhash[8*8] __attribute__ ((aligned (128)));
uint64_t vhashA[8*8] __attribute__ ((aligned (64))); uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
@@ -234,50 +234,6 @@ int x17_8way_hash( void *state, const void *input )
return 1; return 1;
} }
int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash32[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash32_d7 = &(hash32[7*8]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32_d7 = ptarget[7];
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
x17_8way_hash( hash32, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( ( hash32_d7[ lane ] <= targ32_d7 ) && !bench ) )
{
extr_lane_8x32( lane_hash, hash32, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined(X17_4WAY) #elif defined(X17_4WAY)
union _x17_4way_context_overlay union _x17_4way_context_overlay
@@ -302,7 +258,7 @@ union _x17_4way_context_overlay
}; };
typedef union _x17_4way_context_overlay x17_4way_context_overlay; typedef union _x17_4way_context_overlay x17_4way_context_overlay;
int x17_4way_hash( void *state, const void *input ) int x17_4way_hash( void *state, const void *input, int thr_id )
{ {
uint64_t vhash[8*4] __attribute__ ((aligned (64))); uint64_t vhash[8*4] __attribute__ ((aligned (64)));
uint64_t vhashA[8*4] __attribute__ ((aligned (64))); uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
@@ -405,47 +361,4 @@ int x17_4way_hash( void *state, const void *input )
return 1; return 1;
} }
int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash32[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash32_d7 = &(hash32[ 7*4 ]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32_d7 = ptarget[7];
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
x17_4way_hash( hash32, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 && !bench ) )
{
extr_lane_4x32( lane_hash, hash32, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#endif #endif

View File

@@ -3,10 +3,10 @@
bool register_x17_algo( algo_gate_t* gate ) bool register_x17_algo( algo_gate_t* gate )
{ {
#if defined (X17_8WAY) #if defined (X17_8WAY)
gate->scanhash = (void*)&scanhash_x17_8way; gate->scanhash = (void*)&scanhash_8way_64in_32out;
gate->hash = (void*)&x17_8way_hash; gate->hash = (void*)&x17_8way_hash;
#elif defined (X17_4WAY) #elif defined (X17_4WAY)
gate->scanhash = (void*)&scanhash_x17_4way; gate->scanhash = (void*)&scanhash_4way_64in_32out;
gate->hash = (void*)&x17_4way_hash; gate->hash = (void*)&x17_4way_hash;
#else #else
gate->hash = (void*)&x17_hash; gate->hash = (void*)&x17_hash;

View File

@@ -14,14 +14,11 @@ bool register_x17_algo( algo_gate_t* gate );
#if defined(X17_8WAY) #if defined(X17_8WAY)
int x17_8way_hash( void *state, const void *input ); int x17_8way_hash( void *state, const void *input, int thr_id );
int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(X17_4WAY) #elif defined(X17_4WAY)
int x17_4way_hash( void *state, const void *input ); int x17_4way_hash( void *state, const void *input, int thr_id );
int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#endif #endif

View File

@@ -57,7 +57,7 @@ union _xevan_8way_context_overlay
} __attribute__ ((aligned (64))); } __attribute__ ((aligned (64)));
typedef union _xevan_8way_context_overlay xevan_8way_context_overlay; typedef union _xevan_8way_context_overlay xevan_8way_context_overlay;
int xevan_8way_hash( void *output, const void *input ) int xevan_8way_hash( void *output, const void *input, int thr_id )
{ {
uint64_t vhash[16<<3] __attribute__ ((aligned (128))); uint64_t vhash[16<<3] __attribute__ ((aligned (128)));
uint64_t vhashA[16<<3] __attribute__ ((aligned (64))); uint64_t vhashA[16<<3] __attribute__ ((aligned (64)));
@@ -399,50 +399,6 @@ int xevan_8way_hash( void *output, const void *input )
return 1; return 1;
} }
int scanhash_xevan_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[7*8]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
xevan_8way_hash( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined(XEVAN_4WAY) #elif defined(XEVAN_4WAY)
union _xevan_4way_context_overlay union _xevan_4way_context_overlay
@@ -467,7 +423,7 @@ union _xevan_4way_context_overlay
}; };
typedef union _xevan_4way_context_overlay xevan_4way_context_overlay; typedef union _xevan_4way_context_overlay xevan_4way_context_overlay;
int xevan_4way_hash( void *output, const void *input ) int xevan_4way_hash( void *output, const void *input, int thr_id )
{ {
uint64_t hash0[16] __attribute__ ((aligned (64))); uint64_t hash0[16] __attribute__ ((aligned (64)));
uint64_t hash1[16] __attribute__ ((aligned (64))); uint64_t hash1[16] __attribute__ ((aligned (64)));
@@ -672,47 +628,4 @@ int xevan_4way_hash( void *output, const void *input )
return 1; return 1;
} }
int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[16*4] __attribute__ ((aligned (128)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
int thr_id = mythr->id;
__m256i *noncev = (__m256i*)vdata + 9;
const uint32_t targ32 = ptarget[7];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x0cff;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do {
xevan_4way_hash( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hashd7[ lane ] <= targ32 ) && ! bench )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#endif #endif

View File

@@ -3,10 +3,10 @@
bool register_xevan_algo( algo_gate_t* gate ) bool register_xevan_algo( algo_gate_t* gate )
{ {
#if defined (XEVAN_8WAY) #if defined (XEVAN_8WAY)
gate->scanhash = (void*)&scanhash_xevan_8way; gate->scanhash = (void*)&scanhash_8way_64in_32out;
gate->hash = (void*)&xevan_8way_hash; gate->hash = (void*)&xevan_8way_hash;
#elif defined (XEVAN_4WAY) #elif defined (XEVAN_4WAY)
gate->scanhash = (void*)&scanhash_xevan_4way; gate->scanhash = (void*)&scanhash_4way_64in_32out;
gate->hash = (void*)&xevan_4way_hash; gate->hash = (void*)&xevan_4way_hash;
#else #else
init_xevan_ctx(); init_xevan_ctx();

View File

@@ -14,16 +14,11 @@ bool register_xevan_algo( algo_gate_t* gate );
#if defined(XEVAN_8WAY) #if defined(XEVAN_8WAY)
int xevan_8way_hash( void *state, const void *input ); int xevan_8way_hash( void *state, const void *input, int thr_id );
int scanhash_xevan_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(XEVAN_4WAY) #elif defined(XEVAN_4WAY)
int xevan_4way_hash( void *state, const void *input ); int xevan_4way_hash( void *state, const void *input, int thr_id );
int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
//void init_xevan_4way_ctx();
#else #else

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.14.2. # Generated by GNU Autoconf 2.69 for cpuminer-opt 3.14.3.
# #
# #
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='cpuminer-opt' PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.14.2' PACKAGE_VERSION='3.14.3'
PACKAGE_STRING='cpuminer-opt 3.14.2' PACKAGE_STRING='cpuminer-opt 3.14.3'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures cpuminer-opt 3.14.2 to adapt to many kinds of systems. \`configure' configures cpuminer-opt 3.14.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.14.2:";; short | recursive ) echo "Configuration of cpuminer-opt 3.14.3:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
cpuminer-opt configure 3.14.2 cpuminer-opt configure 3.14.3
generated by GNU Autoconf 2.69 generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc. Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.14.2, which was It was created by cpuminer-opt $as_me 3.14.3, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@ $ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='cpuminer-opt' PACKAGE='cpuminer-opt'
VERSION='3.14.2' VERSION='3.14.3'
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by cpuminer-opt $as_me 3.14.2, which was This file was extended by cpuminer-opt $as_me 3.14.3, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
cpuminer-opt config.status 3.14.2 cpuminer-opt config.status 3.14.3
configured by $0, generated by GNU Autoconf 2.69, configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.14.2]) AC_INIT([cpuminer-opt], [3.14.3])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

View File

@@ -92,7 +92,7 @@ bool want_longpoll = false;
bool have_longpoll = false; bool have_longpoll = false;
bool have_gbt = true; bool have_gbt = true;
bool allow_getwork = true; bool allow_getwork = true;
bool want_stratum = true; bool want_stratum = true; // pretty useless
bool have_stratum = false; bool have_stratum = false;
bool allow_mininginfo = true; bool allow_mininginfo = true;
bool use_syslog = false; bool use_syslog = false;
@@ -215,7 +215,7 @@ static char const short_options[] =
static struct work g_work __attribute__ ((aligned (64))) = {{ 0 }}; static struct work g_work __attribute__ ((aligned (64))) = {{ 0 }};
time_t g_work_time = 0; time_t g_work_time = 0;
pthread_mutex_t g_work_lock; pthread_rwlock_t g_work_lock;
static bool submit_old = false; static bool submit_old = false;
char* lp_id; char* lp_id;
@@ -1232,7 +1232,7 @@ static int share_result( int result, struct work *work,
if ( use_colors ) if ( use_colors )
{ {
bcol = acol = scol = rcol = CL_N; bcol = acol = scol = rcol = CL_WHT;
if ( likely( result ) ) if ( likely( result ) )
{ {
acol = CL_WHT CL_GRN; acol = CL_WHT CL_GRN;
@@ -1242,27 +1242,22 @@ static int share_result( int result, struct work *work,
else rcol = CL_WHT CL_RED; else rcol = CL_WHT CL_RED;
} }
applog( LOG_NOTICE, "%d %s%s %s%s %s%s %s%s" CL_N ", %.3f sec (%dms)", applog( LOG_NOTICE, "%d %s%s %s%s %s%s %s%s" CL_WHT ", %.3f sec (%dms)",
my_stats.share_count, acol, ares, scol, sres, rcol, rres, bcol, my_stats.share_count, acol, ares, scol, sres, rcol, rres, bcol,
bres, share_time, latency ); bres, share_time, latency );
/* if ( unlikely( opt_debug || !result || solved ) )
if ( !opt_quiet )
{ {
if ( have_stratum ) if ( have_stratum )
applog2( LOG_INFO, "Diff %.5g (%.3g), %sBlock %d" CL_N ", %sJob %s", applog2( LOG_INFO, "Diff %.5g, Block %d, Job %s",
my_stats.share_diff, share_ratio, bcol, stratum.block_height, my_stats.share_diff, stratum.block_height,
scol, my_stats.job_id ); my_stats.job_id );
else else
{ applog2( LOG_INFO, "Diff %.5g, Block %d",
uint64_t height = work ? work->height : last_block_height; my_stats.share_diff, work ? work->height : last_block_height );
applog2( LOG_INFO, "Diff %.5g (%.3g), %sBlock %d",
my_stats.share_diff, share_ratio, bcol, height );
}
} }
*/
if ( unlikely( opt_debug || !( opt_quiet || result || stale ) ) ) if ( unlikely( !( opt_quiet || result || stale ) ) )
{ {
uint32_t str[8]; uint32_t str[8];
@@ -1835,9 +1830,9 @@ bool submit_solution( struct work *work, const void *hash,
if unlikely( !have_stratum && !have_longpoll ) if unlikely( !have_stratum && !have_longpoll )
{ // block solved, force getwork { // block solved, force getwork
pthread_mutex_lock( &g_work_lock ); pthread_rwlock_wrlock( &g_work_lock );
g_work_time = 0; g_work_time = 0;
pthread_mutex_unlock( &g_work_lock ); pthread_rwlock_unlock( &g_work_lock );
} }
if ( !opt_quiet ) if ( !opt_quiet )
@@ -1960,7 +1955,7 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t *nonceptr = work->data + algo_gate.nonce_index; uint32_t *nonceptr = work->data + algo_gate.nonce_index;
bool force_new_work = false; bool force_new_work = false;
pthread_mutex_lock( &g_work_lock ); pthread_rwlock_rdlock( &g_work_lock );
if ( have_stratum ) if ( have_stratum )
force_new_work = work->job_id ? strtoul( work->job_id, NULL, 16 ) force_new_work = work->job_id ? strtoul( work->job_id, NULL, 16 )
@@ -1978,7 +1973,7 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
else else
++(*nonceptr); ++(*nonceptr);
pthread_mutex_unlock( &g_work_lock ); pthread_rwlock_unlock( &g_work_lock );
} }
bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum, bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
@@ -1998,7 +1993,7 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
bool new_job = *get_stratum_job_ntime() bool new_job = *get_stratum_job_ntime()
!= g_work->data[ algo_gate.ntime_index ]; != g_work->data[ algo_gate.ntime_index ];
pthread_mutex_lock( &g_work_lock ); pthread_rwlock_wrlock( &g_work_lock );
pthread_mutex_lock( &sctx->work_lock ); pthread_mutex_lock( &sctx->work_lock );
free( g_work->job_id ); free( g_work->job_id );
@@ -2013,11 +2008,13 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
g_work->targetdiff = sctx->job.diff g_work->targetdiff = sctx->job.diff
/ ( opt_target_factor * opt_diff_factor ); / ( opt_target_factor * opt_diff_factor );
diff_to_hash( g_work->target, g_work->targetdiff ); diff_to_hash( g_work->target, g_work->targetdiff );
// Increment extranonce2
for ( int t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
g_work_time = time(NULL); g_work_time = time(NULL);
restart_threads(); restart_threads();
pthread_mutex_unlock( &sctx->work_lock ); pthread_mutex_unlock( &sctx->work_lock );
pthread_mutex_unlock( &g_work_lock ); pthread_rwlock_unlock( &g_work_lock );
pthread_mutex_lock( &stats_lock ); pthread_mutex_lock( &stats_lock );
@@ -2037,11 +2034,11 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
else if ( new_job && g_work->job_id ) else if ( new_job && g_work->job_id )
applog( LOG_BLUE, "New Work: Block %d, Net diff %.5g, Job %s", applog( LOG_BLUE, "New Work: Block %d, Net diff %.5g, Job %s",
sctx->block_height, net_diff, g_work->job_id ); sctx->block_height, net_diff, g_work->job_id );
else if ( opt_debug ) else if ( !opt_quiet )
{ {
unsigned char *xnonce2str = abin2hex( g_work->xnonce2, unsigned char *xnonce2str = abin2hex( g_work->xnonce2,
g_work->xnonce2_len ); g_work->xnonce2_len );
applog( LOG_INFO, "Extranonce2 %s, Block %d, Net Diff %.5g", applog( LOG_INFO, "Extranonce %s, Block %d, Net Diff %.5g",
xnonce2str, sctx->block_height, net_diff ); xnonce2str, sctx->block_height, net_diff );
free( xnonce2str ); free( xnonce2str );
} }
@@ -2222,24 +2219,24 @@ static void *miner_thread( void *userdata )
} }
else else
{ {
int scantime = have_longpoll ? LP_SCANTIME : opt_scantime; pthread_rwlock_wrlock( &g_work_lock );
pthread_mutex_lock( &g_work_lock );
if ( ( ( time(NULL) - g_work_time ) >= scantime ) if ( ( ( time(NULL) - g_work_time )
>= ( have_longpoll ? LP_SCANTIME : opt_scantime ) )
|| ( *nonceptr >= end_nonce ) ) || ( *nonceptr >= end_nonce ) )
{ {
if ( unlikely( !get_work( mythr, &g_work ) ) ) if ( unlikely( !get_work( mythr, &g_work ) ) )
{ {
pthread_mutex_unlock( &g_work_lock ); pthread_rwlock_unlock( &g_work_lock );
applog( LOG_ERR, "work retrieval failed, exiting " applog( LOG_ERR, "work retrieval failed, exiting "
"mining thread %d", thr_id ); "mining thread %d", thr_id );
goto out; goto out;
} }
g_work_time = time(NULL); g_work_time = time(NULL);
restart_threads(); restart_threads();
} }
pthread_mutex_unlock( &g_work_lock ); pthread_rwlock_unlock( &g_work_lock );
} }
algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce ); algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce );
@@ -2349,10 +2346,10 @@ static void *miner_thread( void *userdata )
// we can't submit twice a block! // we can't submit twice a block!
if unlikely( !have_stratum && !have_longpoll ) if unlikely( !have_stratum && !have_longpoll )
{ {
pthread_mutex_lock( &g_work_lock ); pthread_rwlock_wrlock( &g_work_lock );
// will force getwork // will force getwork
g_work_time = 0; g_work_time = 0;
pthread_mutex_unlock( &g_work_lock ); pthread_rwlock_unlock( &g_work_lock );
} }
} }
@@ -2384,11 +2381,9 @@ static void *miner_thread( void *userdata )
if ( use_colors && ( curr_temp >= 70 ) ) if ( use_colors && ( curr_temp >= 70 ) )
{ {
if ( curr_temp >= 80 ) if ( curr_temp >= 80 )
sprintf( tempstr, "%s%d C%s", sprintf( tempstr, "%s%d C%s", CL_RED, curr_temp, CL_WHT );
CL_WHT CL_RED, curr_temp, CL_N );
else else
sprintf( tempstr, "%s%d C%s", sprintf( tempstr, "%s%d C%s", CL_YLW, curr_temp, CL_WHT );
CL_WHT CL_YLW, curr_temp, CL_N );
} }
else else
sprintf( tempstr, "%d C", curr_temp ); sprintf( tempstr, "%d C", curr_temp );
@@ -2539,7 +2534,8 @@ start:
res = json_object_get(val, "result"); res = json_object_get(val, "result");
soval = json_object_get(res, "submitold"); soval = json_object_get(res, "submitold");
submit_old = soval ? json_is_true(soval) : false; submit_old = soval ? json_is_true(soval) : false;
pthread_mutex_lock(&g_work_lock);
pthread_rwlock_wrlock( &g_work_lock );
// This code has been here for a long time even though job_id isn't used. // This code has been here for a long time even though job_id isn't used.
// This needs to be changed eventually to test the block height properly // This needs to be changed eventually to test the block height properly
@@ -2573,14 +2569,16 @@ start:
} }
} }
free(start_job_id); free(start_job_id);
pthread_mutex_unlock(&g_work_lock);
pthread_rwlock_unlock( &g_work_lock );
json_decref(val); json_decref(val);
} }
else // !val else // !val
{ {
pthread_mutex_lock(&g_work_lock); pthread_rwlock_wrlock( &g_work_lock );
g_work_time -= LP_SCANTIME; g_work_time -= LP_SCANTIME;
pthread_mutex_unlock(&g_work_lock); pthread_rwlock_unlock( &g_work_lock );
if (err == CURLE_OPERATION_TIMEDOUT) if (err == CURLE_OPERATION_TIMEDOUT)
{ {
restart_threads(); restart_threads();
@@ -2689,12 +2687,8 @@ void std_build_block_header( struct work* g_work, uint32_t version,
void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx ) void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{ {
uchar merkle_tree[64] = { 0 }; uchar merkle_tree[64] = { 0 };
size_t t;
algo_gate.gen_merkle_root( merkle_tree, sctx ); algo_gate.gen_merkle_root( merkle_tree, sctx );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
algo_gate.build_block_header( g_work, le32dec( sctx->job.version ), algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
(uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree, (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits), le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits),
@@ -2733,10 +2727,10 @@ static void *stratum_thread(void *userdata )
while ( !stratum.curl ) while ( !stratum.curl )
{ {
pthread_mutex_lock( &g_work_lock ); pthread_rwlock_wrlock( &g_work_lock );
g_work_time = 0; g_work_time = 0;
pthread_mutex_unlock( &g_work_lock ); pthread_rwlock_unlock( &g_work_lock );
restart_threads(); // restart_threads();
if ( !stratum_connect( &stratum, stratum.url ) if ( !stratum_connect( &stratum, stratum.url )
|| !stratum_subscribe( &stratum ) || !stratum_subscribe( &stratum )
|| !stratum_authorize( &stratum, rpc_user, rpc_pass ) ) || !stratum_authorize( &stratum, rpc_user, rpc_pass ) )
@@ -2872,167 +2866,180 @@ void parse_arg(int key, char *arg )
uint64_t ul; uint64_t ul;
double d; double d;
switch(key) switch( key )
{ {
case 'a': case 'a': // algo
get_algo_alias( &arg ); get_algo_alias( &arg );
for (i = 1; i < ALGO_COUNT; i++) for (i = 1; i < ALGO_COUNT; i++)
{ {
v = (int) strlen(algo_names[i]); v = (int) strlen( algo_names[i] );
if (v && !strncasecmp(arg, algo_names[i], v)) if ( v && !strncasecmp( arg, algo_names[i], v ) )
{ {
if (arg[v] == '\0') if ( arg[v] == '\0' )
{ {
opt_algo = (enum algos) i; opt_algo = (enum algos) i;
break; break;
} }
if (arg[v] == ':') if ( arg[v] == ':' )
{ {
char *ep; char *ep;
v = strtol(arg+v+1, &ep, 10); v = strtol( arg+v+1, &ep, 10 );
if (*ep || v < 2) if ( *ep || v < 2 )
continue; continue;
opt_algo = (enum algos) i; opt_algo = (enum algos) i;
opt_param_n = v; opt_param_n = v;
break; break;
} }
} }
} }
if (i == ALGO_COUNT) if ( i == ALGO_COUNT )
{ {
applog(LOG_ERR,"Unknown algo: %s",arg); applog( LOG_ERR,"Unknown algo: %s",arg );
show_usage_and_exit(1); show_usage_and_exit( 1 );
} }
break; break;
case 'b': case 'b': // api-bind
opt_api_enabled = true; opt_api_enabled = true;
p = strstr(arg, ":"); p = strstr(arg, ":");
if (p) { if ( p )
{
/* ip:port */ /* ip:port */
if (p - arg > 0) { if ( p - arg > 0 )
{
opt_api_allow = strdup(arg); opt_api_allow = strdup(arg);
opt_api_allow[p - arg] = '\0'; opt_api_allow[p - arg] = '\0';
} }
opt_api_listen = atoi(p + 1); opt_api_listen = atoi(p + 1);
} }
else if (arg && strstr(arg, ".")) { else if ( arg && strstr( arg, "." ) )
{
/* ip only */ /* ip only */
free(opt_api_allow); free(opt_api_allow);
opt_api_allow = strdup(arg); opt_api_allow = strdup(arg);
opt_api_listen = default_api_listen; opt_api_listen = default_api_listen;
} }
else if (arg) { else if ( arg )
{
/* port or 0 to disable */ /* port or 0 to disable */
opt_api_allow = default_api_allow; opt_api_allow = default_api_allow;
opt_api_listen = atoi(arg); opt_api_listen = atoi(arg);
} }
break; break;
case 1030: /* --api-remote */ case 1030: // api-remote
opt_api_remote = 1; opt_api_remote = 1;
break; break;
case 'B': case 'B': // background
opt_background = true; opt_background = true;
use_colors = false; use_colors = false;
break; break;
case 'c': { case 'c': { // config
json_error_t err; json_error_t err;
json_t *config; json_t *config;
if (arg && strstr(arg, "://")) if (arg && strstr(arg, "://"))
config = json_load_url(arg, &err); config = json_load_url(arg, &err);
else else
config = JSON_LOADF(arg, &err); config = JSON_LOADF(arg, &err);
if (!json_is_object(config)) if (!json_is_object(config))
{ {
if (err.line < 0) if (err.line < 0)
fprintf(stderr, "%s\n", err.text); fprintf(stderr, "%s\n", err.text);
else else
fprintf(stderr, "%s:%d: %s\n", fprintf(stderr, "%s:%d: %s\n", arg, err.line, err.text);
arg, err.line, err.text);
} }
else else
{ {
parse_config(config, arg); parse_config(config, arg);
json_decref(config); json_decref(config);
} }
break; break;
} }
case 'q':
opt_quiet = true; // debug overrides quiet
case 'q': // quiet
if ( !( opt_debug || opt_protocol ) ) opt_quiet = true;
break; break;
case 'D': case 'D': // debug
opt_debug = true; opt_debug = true;
break; opt_quiet = false;
case 'p': break;
case 'p': // pass
free(rpc_pass); free(rpc_pass);
rpc_pass = strdup(arg); rpc_pass = strdup(arg);
strhide(arg); strhide(arg);
break; break;
case 'P': case 'P': // protocol
opt_protocol = true; opt_protocol = true;
opt_quiet = false;
break; break;
case 'r': case 'r': // retries
v = atoi(arg); v = atoi(arg);
if (v < -1 || v > 9999) /* sanity check */ if (v < -1 || v > 9999) /* sanity check */
show_usage_and_exit(1); show_usage_and_exit(1);
opt_retries = v; opt_retries = v;
break; break;
case 1025: case 1025: // retry-pause
v = atoi(arg); v = atoi(arg);
if (v < 1 || v > 9999) /* sanity check */ if (v < 1 || v > 9999) /* sanity check */
show_usage_and_exit(1); show_usage_and_exit(1);
opt_fail_pause = v; opt_fail_pause = v;
break; break;
case 's': case 's': // scantime
v = atoi(arg); v = atoi(arg);
if (v < 1 || v > 9999) /* sanity check */ if (v < 1 || v > 9999) /* sanity check */
show_usage_and_exit(1); show_usage_and_exit(1);
opt_scantime = v; opt_scantime = v;
break; break;
case 'T': case 'T': // timeout
v = atoi(arg); v = atoi(arg);
if (v < 1 || v > 99999) /* sanity check */ if (v < 1 || v > 99999) /* sanity check */
show_usage_and_exit(1); show_usage_and_exit(1);
opt_timeout = v; opt_timeout = v;
break; break;
case 't': case 't': // threads
v = atoi(arg); v = atoi(arg);
if (v < 0 || v > 9999) /* sanity check */ if (v < 0 || v > 9999) /* sanity check */
show_usage_and_exit(1); show_usage_and_exit(1);
opt_n_threads = v; opt_n_threads = v;
break; break;
case 'u': case 'u': // user
free(rpc_user); free(rpc_user);
rpc_user = strdup(arg); rpc_user = strdup(arg);
break; break;
case 'o': { /* --url */ case 'o': // url
{
char *ap, *hp; char *ap, *hp;
ap = strstr(arg, "://"); ap = strstr( arg, "://" );
ap = ap ? ap + 3 : arg; ap = ap ? ap + 3 : arg;
hp = strrchr(arg, '@'); hp = strrchr( arg, '@' );
if (hp) { if ( hp )
{
*hp = '\0'; *hp = '\0';
p = strchr(ap, ':'); p = strchr( ap, ':' );
if (p) { if ( p )
free(rpc_userpass); {
rpc_userpass = strdup(ap); free( rpc_userpass );
free(rpc_user); rpc_userpass = strdup( ap );
rpc_user = (char*) calloc(p - ap + 1, 1); free( rpc_user );
strncpy(rpc_user, ap, p - ap); rpc_user = (char*)calloc( p - ap + 1, 1 );
free(rpc_pass); strncpy( rpc_user, ap, p - ap );
rpc_pass = strdup(++p); free( rpc_pass );
if (*p) *p++ = 'x'; rpc_pass = strdup( ++p );
v = (int) strlen(hp + 1) + 1; if ( *p ) *p++ = 'x';
memmove(p + 1, hp + 1, v); v = (int)strlen( hp + 1 ) + 1;
memset(p + v, 0, hp - p); memmove( p + 1, hp + 1, v );
memset( p + v, 0, hp - p );
hp = p; hp = p;
} else { }
free(rpc_user); else
rpc_user = strdup(ap); {
free( rpc_user );
rpc_user = strdup( ap );
} }
*hp++ = '@'; *hp++ = '@';
} else }
else
hp = ap; hp = ap;
if ( ap != arg ) if ( ap != arg )
{ {
@@ -3048,23 +3055,26 @@ void parse_arg(int key, char *arg )
rpc_url = strdup(arg); rpc_url = strdup(arg);
strcpy(rpc_url + (ap - arg), hp); strcpy(rpc_url + (ap - arg), hp);
short_url = &rpc_url[ap - arg]; short_url = &rpc_url[ap - arg];
} else { }
if (*hp == '\0' || *hp == '/') { else
fprintf(stderr, "invalid URL -- '%s'\n", {
arg); if ( *hp == '\0' || *hp == '/' )
show_usage_and_exit(1); {
fprintf( stderr, "invalid URL -- '%s'\n", arg );
show_usage_and_exit( 1 );
} }
free(rpc_url); free( rpc_url );
rpc_url = (char*) malloc( strlen(hp) + 15 ); rpc_url = (char*) malloc( strlen(hp) + 15 );
sprintf( rpc_url, "stratum+tcp://%s", hp ); sprintf( rpc_url, "stratum+tcp://%s", hp );
short_url = &rpc_url[ sizeof("stratum+tcp://") - 1 ]; short_url = &rpc_url[ sizeof("stratum+tcp://") - 1 ];
} }
have_stratum = !opt_benchmark && !strncasecmp(rpc_url, "stratum", 7); have_stratum = !opt_benchmark && !strncasecmp( rpc_url, "stratum", 7 );
break; break;
} }
case 'O': /* --userpass */ case 'O': // userpass
p = strchr(arg, ':'); p = strchr(arg, ':');
if (!p) { if (!p)
{
fprintf(stderr, "invalid username:password pair -- '%s'\n", arg); fprintf(stderr, "invalid username:password pair -- '%s'\n", arg);
show_usage_and_exit(1); show_usage_and_exit(1);
} }
@@ -3077,15 +3087,15 @@ void parse_arg(int key, char *arg )
rpc_pass = strdup(++p); rpc_pass = strdup(++p);
strhide(p); strhide(p);
break; break;
case 'x': /* --proxy */ case 'x': // proxy
if (!strncasecmp(arg, "socks4://", 9)) if ( !strncasecmp( arg, "socks4://", 9 ) )
opt_proxy_type = CURLPROXY_SOCKS4; opt_proxy_type = CURLPROXY_SOCKS4;
else if (!strncasecmp(arg, "socks5://", 9)) else if ( !strncasecmp( arg, "socks5://", 9 ) )
opt_proxy_type = CURLPROXY_SOCKS5; opt_proxy_type = CURLPROXY_SOCKS5;
#if LIBCURL_VERSION_NUM >= 0x071200 #if LIBCURL_VERSION_NUM >= 0x071200
else if (!strncasecmp(arg, "socks4a://", 10)) else if ( !strncasecmp( arg, "socks4a://", 10 ) )
opt_proxy_type = CURLPROXY_SOCKS4A; opt_proxy_type = CURLPROXY_SOCKS4A;
else if (!strncasecmp(arg, "socks5h://", 10)) else if ( !strncasecmp( arg, "socks5h://", 10 ) )
opt_proxy_type = CURLPROXY_SOCKS5_HOSTNAME; opt_proxy_type = CURLPROXY_SOCKS5_HOSTNAME;
#endif #endif
else else
@@ -3093,42 +3103,42 @@ void parse_arg(int key, char *arg )
free(opt_proxy); free(opt_proxy);
opt_proxy = strdup(arg); opt_proxy = strdup(arg);
break; break;
case 1001: case 1001: // cert
free(opt_cert); free(opt_cert);
opt_cert = strdup(arg); opt_cert = strdup(arg);
break; break;
case 1002: case 1002: // no-color
use_colors = false; use_colors = false;
break; break;
case 1003: case 1003: // no-longpoll
want_longpoll = false; want_longpoll = false;
break; break;
case 1005: case 1005: // benchmark
opt_benchmark = true; opt_benchmark = true;
want_longpoll = false; want_longpoll = false;
want_stratum = false; want_stratum = false;
have_stratum = false; have_stratum = false;
break; break;
case 1006: case 1006: // cputest
// print_hash_tests(); // print_hash_tests();
exit(0); exit(0);
case 1007: case 1007: // no-stratum
want_stratum = false; want_stratum = false;
opt_extranonce = false; opt_extranonce = false;
break; break;
case 1008: case 1008: // time-limit
opt_time_limit = atoi(arg); opt_time_limit = atoi(arg);
break; break;
case 1009: case 1009: // no-redirect
opt_redirect = false; opt_redirect = false;
break; break;
case 1010: case 1010: // no-getwork
allow_getwork = false; allow_getwork = false;
break; break;
case 1011: case 1011: // no-gbt
have_gbt = false; have_gbt = false;
break; break;
case 1012: case 1012: // no-extranonce
opt_extranonce = false; opt_extranonce = false;
break; break;
case 1014: // hash-meter case 1014: // hash-meter
@@ -3138,11 +3148,12 @@ void parse_arg(int key, char *arg )
if ( arg ) coinbase_address = strdup( arg ); if ( arg ) coinbase_address = strdup( arg );
break; break;
case 1015: /* --coinbase-sig */ case 1015: /* --coinbase-sig */
if (strlen(arg) + 1 > sizeof(coinbase_sig)) { if ( strlen( arg ) + 1 > sizeof(coinbase_sig) )
fprintf(stderr, "coinbase signature too long\n"); {
show_usage_and_exit(1); fprintf( stderr, "coinbase signature too long\n" );
show_usage_and_exit( 1 );
} }
strcpy(coinbase_sig, arg); strcpy( coinbase_sig, arg );
break; break;
case 'f': case 'f':
d = atof(arg); d = atof(arg);
@@ -3156,11 +3167,13 @@ void parse_arg(int key, char *arg )
show_usage_and_exit(1); show_usage_and_exit(1);
opt_diff_factor = 1.0/d; opt_diff_factor = 1.0/d;
break; break;
case 'S': #ifdef HAVE_SYSLOG_H
case 'S': // syslog
use_syslog = true; use_syslog = true;
use_colors = false; use_colors = false;
break; break;
case 1020: #endif
case 1020: // cpu-affinity
p = strstr(arg, "0x"); p = strstr(arg, "0x");
if ( p ) if ( p )
ul = strtoull( p, NULL, 16 ); ul = strtoull( p, NULL, 16 );
@@ -3171,14 +3184,14 @@ void parse_arg(int key, char *arg )
#if AFFINITY_USES_UINT128 #if AFFINITY_USES_UINT128
// replicate the low 64 bits to make a full 128 bit mask if there are more // replicate the low 64 bits to make a full 128 bit mask if there are more
// than 64 CPUs, otherwise zero extend the upper half. // than 64 CPUs, otherwise zero extend the upper half.
opt_affinity = (uint128_t)ul; opt_affinity = (uint128_t)ul;
if ( num_cpus > 64 ) if ( num_cpus > 64 )
opt_affinity = (opt_affinity << 64 ) | opt_affinity; opt_affinity = (opt_affinity << 64 ) | opt_affinity;
#else #else
opt_affinity = ul; opt_affinity = ul;
#endif #endif
break; break;
case 1021: case 1021: // cpu-priority
v = atoi(arg); v = atoi(arg);
if (v < 0 || v > 5) /* sanity check */ if (v < 0 || v > 5) /* sanity check */
show_usage_and_exit(1); show_usage_and_exit(1);
@@ -3637,7 +3650,7 @@ int main(int argc, char *argv[])
if ( !check_cpu_capability() ) exit(1); if ( !check_cpu_capability() ) exit(1);
pthread_mutex_init( &stats_lock, NULL ); pthread_mutex_init( &stats_lock, NULL );
pthread_mutex_init( &g_work_lock, NULL ); pthread_rwlock_init( &g_work_lock, NULL );
pthread_mutex_init( &stratum.sock_lock, NULL ); pthread_mutex_init( &stratum.sock_lock, NULL );
pthread_mutex_init( &stratum.work_lock, NULL ); pthread_mutex_init( &stratum.work_lock, NULL );
@@ -3797,7 +3810,7 @@ int main(int argc, char *argv[])
return 1; return 1;
} }
} }
if (want_stratum) if ( have_stratum )
{ {
if ( opt_debug ) if ( opt_debug )
applog(LOG_INFO,"Creating stratum thread"); applog(LOG_INFO,"Creating stratum thread");

27
miner.h
View File

@@ -83,6 +83,8 @@ enum {
}; };
#endif #endif
extern bool is_power_of_2( int n );
static inline bool is_windows(void) static inline bool is_windows(void)
{ {
#ifdef WIN32 #ifdef WIN32
@@ -378,36 +380,25 @@ void cpu_brand_string( char* s );
float cpu_temp( int core ); float cpu_temp( int core );
*/ */
struct work { struct work
{
uint32_t target[8] __attribute__ ((aligned (64)));
uint32_t data[48] __attribute__ ((aligned (64))); uint32_t data[48] __attribute__ ((aligned (64)));
uint32_t target[8] __attribute__ ((aligned (64)));
double targetdiff; double targetdiff;
// double shareratio;
double sharediff; double sharediff;
double stratum_diff; double stratum_diff;
int height; int height;
char *txs; char *txs;
char *workid; char *workid;
char *job_id; char *job_id;
size_t xnonce2_len; size_t xnonce2_len;
unsigned char *xnonce2; unsigned char *xnonce2;
bool sapling; bool sapling;
bool stale; bool stale;
// x16rt
uint32_t merkleroothash[8];
uint32_t witmerkleroothash[8];
uint32_t denom10[8];
uint32_t denom100[8];
uint32_t denom1000[8];
uint32_t denom10000[8];
} __attribute__ ((aligned (64))); } __attribute__ ((aligned (64)));
struct stratum_job { struct stratum_job
{
unsigned char prevhash[32]; unsigned char prevhash[32];
unsigned char final_sapling_hash[32]; unsigned char final_sapling_hash[32];
char *job_id; char *job_id;
@@ -421,7 +412,7 @@ struct stratum_job {
unsigned char ntime[4]; unsigned char ntime[4];
double diff; double diff;
bool clean; bool clean;
// for x16rt // for x16rt-veil
unsigned char extra[64]; unsigned char extra[64];
unsigned char denom10[32]; unsigned char denom10[32];
unsigned char denom100[32]; unsigned char denom100[32];
@@ -756,7 +747,7 @@ extern double opt_diff_factor;
extern double opt_target_factor; extern double opt_target_factor;
extern bool opt_randomize; extern bool opt_randomize;
extern bool allow_mininginfo; extern bool allow_mininginfo;
extern pthread_mutex_t g_work_lock; extern pthread_rwlock_t g_work_lock;
extern time_t g_work_time; extern time_t g_work_time;
extern bool opt_stratum_stats; extern bool opt_stratum_stats;
extern int num_cpus; extern int num_cpus;

View File

@@ -375,10 +375,10 @@ static inline void memcpy_512( __m512i *dst, const __m512i *src, const int n )
// Generic for odd rotations // Generic for odd rotations
#define mm512_ror_x64( v, n ) _mm512_alignr_epi64( v, v, n ) #define mm512_ror_x64( v, n ) _mm512_alignr_epi64( v, v, n )
#define mm512_rol_x64( v, n ) _mm512_alignr_epi64( v, v, 8-n ) #define mm512_rol_x64( v, n ) _mm512_alignr_epi64( v, v, 8-(n) )
#define mm512_ror_x32( v, n ) _mm512_alignr_epi32( v, v, n ) #define mm512_ror_x32( v, n ) _mm512_alignr_epi32( v, v, n )
#define mm512_rol_x32( v, n ) _mm512_alignr_epi32( v, v, 16-n ) #define mm512_rol_x32( v, n ) _mm512_alignr_epi32( v, v, 16-(n) )
#define mm512_ror_1x16( v ) \ #define mm512_ror_1x16( v ) \
_mm512_permutexvar_epi16( m512_const_64( \ _mm512_permutexvar_epi16( m512_const_64( \

11
util.c
View File

@@ -81,6 +81,15 @@ struct thread_q {
pthread_cond_t cond; pthread_cond_t cond;
}; };
bool is_power_of_2( int n )
{
while ( n > 1 )
{
if ( n % 2 != 0 ) return false;
n = n / 2;
}
return true;
}
void applog2( int prio, const char *fmt, ... ) void applog2( int prio, const char *fmt, ... )
{ {
@@ -609,6 +618,8 @@ json_t *json_rpc_call(CURL *curl, const char *url,
goto err_out; goto err_out;
} }
// want_stratum is useless, and so is this code it seems. Nothing in
// hi appears to be set.
/* If X-Stratum was found, activate Stratum */ /* If X-Stratum was found, activate Stratum */
if (want_stratum && hi.stratum_url && if (want_stratum && hi.stratum_url &&
!strncasecmp(hi.stratum_url, "stratum+tcp://", 14)) { !strncasecmp(hi.stratum_url, "stratum+tcp://", 14)) {