Compare commits

...

6 Commits

Author SHA1 Message Date
Jay D Dee
cdd587537e v3.14.3 2020-06-18 17:30:26 -04:00
Jay D Dee
51a1d91abd v3.14.2 2020-05-30 21:20:44 -04:00
Jay D Dee
13563e2598 v3.14.1 2020-05-21 13:00:29 -04:00
Jay D Dee
9571f85d53 v3.14.0 2020-05-20 13:56:35 -04:00
Jay D Dee
0e69756634 v3.13.2-segwit-test 2020-05-18 18:17:27 -04:00
Jay D Dee
9653bca1e2 v3.13.1.1 2020-05-17 19:21:37 -04:00
39 changed files with 915 additions and 891 deletions

View File

@@ -46,6 +46,15 @@ Icelake is only available on some laptops. Mining with a laptop is not
recommended. The icelake build is included in anticipation of Intel eventually
releasing a desktop CPU with a microarchitecture newer than Skylake.
Notes about included DLL files:
Downloading DLL files from alternative sources presents an inherent
security risk if their source is unknown. All DLL files included have
been copied from the Ubuntu-20.04 instalation or compiled by me from
source code obtained from the author's official repository. The exact
procedure is documented in the build instructions for Windows:
https://github.com/JayDDee/cpuminer-opt/wiki/Compiling-from-source
If you like this software feel free to donate:
BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT

View File

@@ -65,6 +65,49 @@ If not what makes it happen or not happen?
Change Log
----------
v3.14.3
#265: more mutex changes to reduce blocking with high thread count.
#267: fixed hodl algo potential memory alignment issue,
add warning when thread count is not valid for mining hodl algo.
v3.14.2
The second line of the Share Accepted log is no longer displayed,
new Xnonce log is added and other small log tweaks.
#265: Cleanup use of mutex.
v3.14.1
GBT and getwork log changes:
fixed missing TTF in New Block log,
ntime no longer byte-swapped for display in New Work log,
fixed zero effective hash rate in Periodic Report log,
deleted "Current block is..." log.
Renamed stratum "New Job" log to "New Work" to be consistent with the solo
version of the log. Added more data to both versions.
v3.14.0
Changes to solo mining:
- segwit is supported by getblocktemplate,
- longpolling is not working and is disabled,
- Periodic Report log is output,
- New Block log includes TTF estimates,
- Stratum thread no longer created when using getwork or GBT.
Fixed BUG log mining sha256d.
v3.13.1.1
Fixed Windows crash mining minotaur algo.
Fixed GCC 10 compile again.
Added -fno-common to testing to be consistent with GCC 10 default.
v3.13.1
Added minotaur algo for Ringcoin.

View File

@@ -105,17 +105,16 @@ int scanhash_generic( struct work *work, uint32_t max_nonce,
uint32_t hash[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 1;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm128_bswap32_80( edata, pdata );
do
{
edata[19] = n;
if ( likely( algo_gate.hash( hash, edata, thr_id ) ) )
if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
{
@@ -123,12 +122,125 @@ int scanhash_generic( struct work *work, uint32_t max_nonce,
submit_solution( work, hash, mythr );
}
n++;
} while ( n < max_nonce && !work_restart[thr_id].restart );
} while ( n < last_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
}
#if defined(__AVX2__)
//int scanhash_4way_64_64( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr )
//int scanhash_4way_64_640( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr )
int scanhash_4way_64in_32out( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash32[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash32_d7 = &(hash32[ 7*4 ]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32_d7 = ptarget[7];
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if ( likely( algo_gate.hash( hash32, vdata, thr_id ) ) )
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 && !bench ) )
{
extr_lane_4x32( lane_hash, hash32, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
//int scanhash_8way_32_32( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr )
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
//int scanhash_8way_64_64( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr )
//int scanhash_8way_64_640( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr )
int scanhash_8way_64in_32out( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash32[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash32_d7 = &(hash32[7*8]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32_d7 = ptarget[7];
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if ( likely( algo_gate.hash( hash32, vdata, thr_id ) ) )
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( ( hash32_d7[ lane ] <= targ32_d7 ) && !bench ) )
{
extr_lane_8x32( lane_hash, hash32, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
//int scanhash_16way_32_32( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr )
#endif
int null_hash()
{
applog(LOG_WARNING,"SWERR: null_hash unsafe null function");

View File

@@ -110,10 +110,12 @@ inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
typedef struct
{
// mandatory function, must be overwritten
// Mandatory functions, one of these is mandatory. If a generic scanhash
// is used a custom hash function must be registered, with a custom scanhash
// the custom hash function can be called directly and doesn't need to be
// registered in the gate.
int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );
//int ( *hash ) ( void*, const void*, uint32_t ) ;
int ( *hash ) ( void*, const void*, int );
//optional, safe to use default in most cases
@@ -126,7 +128,7 @@ bool ( *miner_thread_init ) ( int );
void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t* );
// Decode getwork blockheader
bool ( *work_decode ) ( const json_t*, struct work* );
bool ( *work_decode ) ( struct work* );
// Extra getwork data
void ( *decode_extra_data ) ( struct work*, uint64_t* );
@@ -201,19 +203,61 @@ void four_way_not_tested();
#define STD_WORK_DATA_SIZE 128
#define STD_WORK_CMP_SIZE 76
#define JR2_NONCE_INDEX 39 // 8 bit offset
//#define JR2_NONCE_INDEX 39 // 8 bit offset
// These indexes are only used with JSON RPC2 and are not gated.
#define JR2_WORK_CMP_INDEX_2 43
#define JR2_WORK_CMP_SIZE_2 33
//#define JR2_WORK_CMP_INDEX_2 43
//#define JR2_WORK_CMP_SIZE_2 33
// deprecated, use generic instead
int null_scanhash();
// Default generic, may be used in many cases.
// N-way is more complicated, requires many different implementations
// depending on architecture, input format, and output format.
// Naming convention is scanhash_[N]way_[input format]in_[output format]out
// N = number of lanes
// input/output format:
// 32: 32 bit interleaved parallel lanes
// 64: 64 bit interleaved parallel lanes
// 640: input only, not interleaved, contiguous serial 640 bit lanes.
// 256: output only, not interleaved, contiguous serial 256 bit lanes.
int scanhash_generic( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#if defined(__AVX2__)
//int scanhash_4way_64in_64out( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr );
//int scanhash_4way_64in_256out( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr );
int scanhash_4way_64in_32out( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
//int scanhash_8way_32in_32out( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr );
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
//int scanhash_8way_64in_64out( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr );
//int scanhash_8way_64in_256out( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr );
int scanhash_8way_64in_32out( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
//int scanhash_16way_32in_32out( struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done, struct thr_info *mythr );
#endif
// displays warning
int null_hash ();
@@ -225,8 +269,8 @@ void std_get_new_work( struct work *work, struct work *g_work, int thr_id,
void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx );
void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );
bool std_le_work_decode( const json_t *val, struct work *work );
bool std_be_work_decode( const json_t *val, struct work *work );
bool std_le_work_decode( struct work *work );
bool std_be_work_decode( struct work *work );
bool std_le_submit_getwork_result( CURL *curl, struct work *work );
bool std_be_submit_getwork_result( CURL *curl, struct work *work );
@@ -261,7 +305,7 @@ int std_get_work_data_size();
// by calling the algo's register function.
bool register_algo_gate( int algo, algo_gate_t *gate );
// Called by algos toverride any default gate functions that are applicable
// Called by algos to verride any default gate functions that are applicable
// and do any other algo-specific initialization.
// The register functions for all the algos can be declared here to reduce
// compiler warnings but that's just more work for devs adding new algos.

View File

@@ -78,7 +78,6 @@ void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
uint32_t extraheader[32] = { 0 };
int headersize = 0;
uint32_t* extradata = (uint32_t*) sctx->xnonce1;
size_t t;
int i;
// getwork over stratum, getwork merkle + header passed in coinb1
@@ -87,9 +86,6 @@ void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
sizeof(extraheader) );
memcpy( extraheader, &sctx->job.coinbase[32], headersize );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version );

View File

@@ -99,9 +99,13 @@ void hodl_build_block_header( struct work* g_work, uint32_t version,
// called only by thread 0, saves a backup of g_work
void hodl_get_new_work( struct work* work, struct work* g_work)
{
work_free( &hodl_work );
work_copy( &hodl_work, g_work );
hodl_work.data[ algo_gate.nonce_index ] = ( clock() + rand() ) % 9999;
pthread_rwlock_rdlock( &g_work_lock );
work_free( &hodl_work );
work_copy( &hodl_work, g_work );
hodl_work.data[ algo_gate.nonce_index ] = ( clock() + rand() ) % 9999;
pthread_rwlock_unlock( &g_work_lock );
}
json_t *hodl_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
@@ -155,11 +159,10 @@ bool register_hodl_algo( algo_gate_t* gate )
applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
return false;
#endif
// if ( TOTAL_CHUNKS % opt_n_threads )
// {
// applog(LOG_ERR,"Thread count must be power of 2.");
// return false;
// }
if ( GARBAGE_SIZE % opt_n_threads )
applog( LOG_WARNING,"WARNING: Thread count must be power of 2. Miner may crash or produce invalid hash!" );
pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
gate->optimizations = SSE42_OPT | AES_OPT | AVX2_OPT;
gate->scanhash = (void*)&hodl_scanhash;
@@ -171,7 +174,7 @@ bool register_hodl_algo( algo_gate_t* gate )
gate->resync_threads = (void*)&hodl_resync_threads;
gate->do_this_thread = (void*)&hodl_do_this_thread;
gate->work_cmp_size = 76;
hodl_scratchbuf = (unsigned char*)malloc( 1 << 30 );
hodl_scratchbuf = (unsigned char*)_mm_malloc( 1 << 30, 64 );
allow_getwork = false;
opt_target_factor = 8388608.0;
return ( hodl_scratchbuf != NULL );

View File

@@ -70,7 +70,7 @@ int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
uint32_t *ptarget = work->target;
int threadNumber = mythr->id;
CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf;
CacheEntry Cache[AES_PARALLEL_N];
CacheEntry Cache[AES_PARALLEL_N] __attribute__ ((aligned (64)));
__m128i* data[AES_PARALLEL_N];
const __m128i* next[AES_PARALLEL_N];
uint32_t CollisionCount = 0;

View File

@@ -215,9 +215,6 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
size_t t;
algo_gate.gen_merkle_root( merkle_tree, sctx );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
(uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits), NULL );
@@ -225,7 +222,6 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
g_work->data[ 20+t ] = ((uint32_t*)sctx->job.extra)[t];
}
bool register_phi2_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;

View File

@@ -156,6 +156,8 @@ int scanhash_zr5( struct work *work, uint32_t max_nonce,
void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t* end_nonce_ptr )
{
pthread_rwlock_rdlock( &g_work_lock );
// ignore POK in first word
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
uint32_t *nonceptr = work->data + algo_gate.nonce_index;
@@ -171,6 +173,8 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
}
else
++(*nonceptr);
pthread_rwlock_unlock( &g_work_lock );
}
void zr5_display_pok( struct work* work )

View File

@@ -69,13 +69,9 @@ void lbry_build_block_header( struct work* g_work, uint32_t version,
void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{
unsigned char merkle_root[64] = { 0 };
size_t t;
int i;
algo_gate.gen_merkle_root( merkle_root, sctx );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version );

View File

@@ -479,8 +479,8 @@ static inline void sha256d_ms(uint32_t *hash, uint32_t *W,
void sha256d_ms_4way(uint32_t *hash, uint32_t *data,
const uint32_t *midstate, const uint32_t *prehash);
static inline int scanhash_sha256d_4way(int thr_id, struct work *work,
uint32_t max_nonce, uint64_t *hashes_done)
static inline int scanhash_sha256d_4way( struct work *work,
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -492,6 +492,7 @@ static inline int scanhash_sha256d_4way(int thr_id, struct work *work,
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id;
int i, j;
memcpy(data, pdata + 16, 64);
@@ -521,10 +522,8 @@ static inline int scanhash_sha256d_4way(int thr_id, struct work *work,
if (swab32(hash[4 * 7 + i]) <= Htarg) {
pdata[19] = data[4 * 3 + i];
sha256d_80_swap(hash, pdata);
if (fulltest(hash, ptarget)) {
*hashes_done = n - first_nonce + 1;
return 1;
}
if ( fulltest( hash, ptarget ) && !opt_benchmark )
submit_solution( work, hash, mythr );
}
}
} while (n < max_nonce && !work_restart[thr_id].restart);
@@ -541,8 +540,8 @@ static inline int scanhash_sha256d_4way(int thr_id, struct work *work,
void sha256d_ms_8way(uint32_t *hash, uint32_t *data,
const uint32_t *midstate, const uint32_t *prehash);
static inline int scanhash_sha256d_8way(int thr_id, struct work *work,
uint32_t max_nonce, uint64_t *hashes_done)
static inline int scanhash_sha256d_8way( struct work *work,
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -554,6 +553,7 @@ static inline int scanhash_sha256d_8way(int thr_id, struct work *work,
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id;
int i, j;
memcpy(data, pdata + 16, 64);
@@ -583,10 +583,8 @@ static inline int scanhash_sha256d_8way(int thr_id, struct work *work,
if (swab32(hash[8 * 7 + i]) <= Htarg) {
pdata[19] = data[8 * 3 + i];
sha256d_80_swap(hash, pdata);
if (fulltest(hash, ptarget)) {
*hashes_done = n - first_nonce + 1;
return 1;
}
if ( fulltest( hash, ptarget ) && !opt_benchmark )
submit_solution( work, hash, mythr );
}
}
} while (n < max_nonce && !work_restart[thr_id].restart);
@@ -614,13 +612,11 @@ int scanhash_sha256d( struct work *work,
#ifdef HAVE_SHA256_8WAY
if (sha256_use_8way())
return scanhash_sha256d_8way(thr_id, work,
max_nonce, hashes_done);
return scanhash_sha256d_8way( work, max_nonce, hashes_done, mythr );
#endif
#ifdef HAVE_SHA256_4WAY
if (sha256_use_4way())
return scanhash_sha256d_4way(thr_id, work,
max_nonce, hashes_done);
return scanhash_sha256d_4way( work, max_nonce, hashes_done, mythr );
#endif
memcpy(data, pdata + 16, 64);
@@ -657,7 +653,7 @@ int scanhash_SHA256d( struct work *work, const uint32_t max_nonce,
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id; // thr_id arg is deprecated
int thr_id = mythr->id;
memcpy( data, pdata, 80 );

View File

@@ -731,7 +731,7 @@ void skein512_8way_full( skein512_8way_context *sc, void *out, const void *data,
void
skein512_8way_prehash64( skein512_8way_context *sc, const void *data )
{
__m512i *vdata = (__m512*)data;
__m512i *vdata = (__m512i*)data;
__m512i *buf = sc->buf;
buf[0] = vdata[0];
buf[1] = vdata[1];

View File

@@ -7,7 +7,6 @@
#include <stdio.h>
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
@@ -15,7 +14,6 @@
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/nist.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
@@ -24,9 +22,11 @@
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#else
#include "algo/echo/sph_echo.h"
#include "algo/groestl/sph_groestl.h"
#endif
// Config
#define MINOTAUR_ALGO_COUNT 16
@@ -39,8 +39,8 @@ struct TortureGarden {
hashState_echo echo;
hashState_groestl groestl;
#else
sph_groestl512_context groestl;
sph_echo512_context echo;
sph_groestl512_context groestl;
#endif
sph_blake512_context blake;
sph_bmw512_context bmw;
@@ -62,13 +62,13 @@ struct TortureGarden {
TortureNode *childLeft;
TortureNode *childRight;
} nodes[22];
};
} __attribute__ ((aligned (64)));
// Get a 64-byte hash for given 64-byte input, using given TortureGarden contexts and given algo index
static void get_hash( void *output, const void *input, TortureGarden *garden,
unsigned int algo )
{
unsigned char _ALIGN(64) hash[64];
unsigned char hash[64] __attribute__ ((aligned (64)));
switch (algo) {
case 0:
@@ -170,7 +170,7 @@ static void get_hash( void *output, const void *input, TortureGarden *garden,
static void traverse_garden( TortureGarden *garden, void *hash,
TortureNode *node )
{
unsigned char _ALIGN(64) partialHash[64];
unsigned char partialHash[64] __attribute__ ((aligned (64)));
get_hash(partialHash, hash, garden, node->algo);
if ( partialHash[63] % 2 == 0 )
@@ -195,9 +195,9 @@ static inline void link_nodes( TortureNode *parent, TortureNode *childLeft,
parent->childRight = childRight;
}
static TortureGarden garden;
static __thread TortureGarden garden;
void initialize_torture_garden()
bool initialize_torture_garden()
{
// Create torture garden nodes. Note that both sides of 19 and 20 lead to 21, and 21 has no children (to make traversal complete).
link_nodes(&garden.nodes[0], &garden.nodes[1], &garden.nodes[2]);
@@ -223,12 +223,13 @@ void initialize_torture_garden()
link_nodes(&garden.nodes[20], &garden.nodes[21], &garden.nodes[21]);
garden.nodes[21].childLeft = NULL;
garden.nodes[21].childRight = NULL;
return true;
}
// Produce a 32-byte hash from 80-byte input data
int minotaur_hash( void *output, const void *input )
int minotaur_hash( void *output, const void *input, int thr_id )
{
unsigned char _ALIGN(64) hash[64];
unsigned char hash[64] __attribute__ ((aligned (64)));
// Find initial sha512 hash
SHA512_Init( &garden.sha512 );
@@ -251,7 +252,7 @@ bool register_minotaur_algo( algo_gate_t* gate )
{
gate->hash = (void*)&minotaur_hash;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
initialize_torture_garden();
gate->miner_thread_init = (void*)&initialize_torture_garden;
return true;
};

View File

@@ -135,18 +135,16 @@ void x16rt_getAlgoString( const uint32_t *timeHash, char *output)
void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{
uint32_t merkleroothash[8];
uint32_t witmerkleroothash[8];
uint32_t denom10[8];
uint32_t denom100[8];
uint32_t denom1000[8];
uint32_t denom10000[8];
int i;
uchar merkle_tree[64] = { 0 };
size_t t;
algo_gate.gen_merkle_root( merkle_tree, sctx );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
// algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
// (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
// le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits) );
int i;
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version );
@@ -164,35 +162,35 @@ void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
g_work->data[31] = 0x00000280;
for ( i = 0; i < 8; i++ )
g_work->merkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i);
merkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i);
for ( i = 0; i < 8; i++ )
g_work->witmerkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i);
witmerkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i);
for ( i = 0; i < 8; i++ )
g_work->denom10[i] = le32dec((uint32_t *)sctx->job.denom10 + i);
denom10[i] = le32dec((uint32_t *)sctx->job.denom10 + i);
for ( i = 0; i < 8; i++ )
g_work->denom100[i] = le32dec((uint32_t *)sctx->job.denom100 + i);
denom100[i] = le32dec((uint32_t *)sctx->job.denom100 + i);
for ( i = 0; i < 8; i++ )
g_work->denom1000[i] = le32dec((uint32_t *)sctx->job.denom1000 + i);
denom1000[i] = le32dec((uint32_t *)sctx->job.denom1000 + i);
for ( i = 0; i < 8; i++ )
g_work->denom10000[i] = le32dec((uint32_t *)sctx->job.denom10000 + i);
denom10000[i] = le32dec((uint32_t *)sctx->job.denom10000 + i);
uint32_t pofnhash[8];
memset(pofnhash, 0x00, 32);
char denom10_str [ 2 * sizeof( g_work->denom10 ) + 1 ];
char denom100_str [ 2 * sizeof( g_work->denom100 ) + 1 ];
char denom1000_str [ 2 * sizeof( g_work->denom1000 ) + 1 ];
char denom10000_str [ 2 * sizeof( g_work->denom10000 ) + 1 ];
char merkleroot_str [ 2 * sizeof( g_work->merkleroothash ) + 1 ];
char witmerkleroot_str[ 2 * sizeof( g_work->witmerkleroothash ) + 1 ];
char denom10_str [ 2 * sizeof( denom10 ) + 1 ];
char denom100_str [ 2 * sizeof( denom100 ) + 1 ];
char denom1000_str [ 2 * sizeof( denom1000 ) + 1 ];
char denom10000_str [ 2 * sizeof( denom10000 ) + 1 ];
char merkleroot_str [ 2 * sizeof( merkleroothash ) + 1 ];
char witmerkleroot_str[ 2 * sizeof( witmerkleroothash ) + 1 ];
char pofn_str [ 2 * sizeof( pofnhash ) + 1 ];
cbin2hex( denom10_str, (char*) g_work->denom10, 32 );
cbin2hex( denom100_str, (char*) g_work->denom100, 32 );
cbin2hex( denom1000_str, (char*) g_work->denom1000, 32 );
cbin2hex( denom10000_str, (char*) g_work->denom10000, 32 );
cbin2hex( merkleroot_str, (char*) g_work->merkleroothash, 32 );
cbin2hex( witmerkleroot_str, (char*) g_work->witmerkleroothash, 32 );
cbin2hex( denom10_str, (char*) denom10, 32 );
cbin2hex( denom100_str, (char*) denom100, 32 );
cbin2hex( denom1000_str, (char*) denom1000, 32 );
cbin2hex( denom10000_str, (char*) denom10000, 32 );
cbin2hex( merkleroot_str, (char*) merkleroothash, 32 );
cbin2hex( witmerkleroot_str, (char*) witmerkleroothash, 32 );
cbin2hex( pofn_str, (char*) pofnhash, 32 );
if ( true )

View File

@@ -58,7 +58,7 @@ union _sonoa_8way_context_overlay
typedef union _sonoa_8way_context_overlay sonoa_8way_context_overlay;
int sonoa_8way_hash( void *state, const void *input, int thrid )
int sonoa_8way_hash( void *state, const void *input, int thr_id )
{
uint64_t vhash[8*8] __attribute__ ((aligned (128)));
uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
@@ -186,7 +186,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
#endif
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
// 2
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
@@ -302,7 +302,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
hamsi512_8way_close( &ctx.hamsi, vhash );
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
// 3
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
@@ -432,7 +432,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
// 4
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -630,7 +630,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
#endif
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
// 5
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
@@ -783,7 +783,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
// 6
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -952,7 +952,7 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 );
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
// 7
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -1117,49 +1117,6 @@ int sonoa_8way_hash( void *state, const void *input, int thrid )
return 1;
}
int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*16] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[7<<3]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32 = ptarget[7];
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if ( sonoa_8way_hash( hash, vdata, thr_id ) )
for ( int lane = 0; lane < 8; lane++ )
if unlikely( ( hashd7[ lane ] <= targ32 ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined(SONOA_4WAY)
@@ -1186,7 +1143,7 @@ union _sonoa_4way_context_overlay
typedef union _sonoa_4way_context_overlay sonoa_4way_context_overlay;
int sonoa_4way_hash( void *state, const void *input, int thrid )
int sonoa_4way_hash( void *state, const void *input, int thr_id )
{
uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64)));
@@ -1250,7 +1207,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, 64 );
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
// 2
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1310,7 +1267,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash );
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
// 3
bmw512_4way_init( &ctx.bmw );
@@ -1375,7 +1332,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
// 4
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1472,7 +1429,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
// 5
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
@@ -1557,7 +1514,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
// 6
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1650,7 +1607,7 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
// 7
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -1745,46 +1702,4 @@ int sonoa_4way_hash( void *state, const void *input, int thrid )
return 1;
}
int scanhash_sonoa_4way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hashd7 = &( hash[7<<2] );
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
const uint32_t targ32 = ptarget[7];
uint32_t n = first_nonce;
__m256i *noncev = (__m256i*)vdata + 9;
const int thr_id = mythr->id;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if ( sonoa_4way_hash( hash, vdata, thr_id ) )
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hashd7[ lane ] <= targ32 ) )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#endif

View File

@@ -3,15 +3,14 @@
bool register_sonoa_algo( algo_gate_t* gate )
{
#if defined (SONOA_8WAY)
gate->scanhash = (void*)&scanhash_sonoa_8way;
// gate->hash = (void*)&sonoa_8way_hash;
gate->scanhash = (void*)&scanhash_8way_64in_32out;
gate->hash = (void*)&sonoa_8way_hash;
#elif defined (SONOA_4WAY)
gate->scanhash = (void*)&scanhash_sonoa_4way;
// gate->hash = (void*)&sonoa_4way_hash;
gate->scanhash = (void*)&scanhash_4way_64in_32out;
gate->hash = (void*)&sonoa_4way_hash;
#else
init_sonoa_ctx();
gate->scanhash = (void*)&scanhash_sonoa;
// gate->hash = (void*)&sonoa_hash;
gate->hash = (void*)&sonoa_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
return true;

View File

@@ -14,21 +14,15 @@ bool register_sonoa_algo( algo_gate_t* gate );
#if defined(SONOA_8WAY)
int sonoa_8way_hash( void *state, const void *input, int thrid );
int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
int sonoa_8way_hash( void *state, const void *input, int thr_id );
#elif defined(SONOA_4WAY)
int sonoa_4way_hash( void *state, const void *input, int thrid );
int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
int sonoa_4way_hash( void *state, const void *input, int thr_id );
#else
int sonoa_hash( void *state, const void *input, int thrid );
int scanhash_sonoa( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
int sonoa_hash( void *state, const void *input, int thr_id );
void init_sonoa_ctx();
#endif

View File

@@ -83,7 +83,7 @@ void init_sonoa_ctx()
sph_haval256_5_init(&sonoa_ctx.haval);
};
int sonoa_hash( void *state, const void *input, int thrid )
int sonoa_hash( void *state, const void *input, int thr_id )
{
uint8_t hash[128] __attribute__ ((aligned (64)));
sonoa_ctx_holder ctx __attribute__ ((aligned (64)));
@@ -132,7 +132,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
sph_echo512_close(&ctx.echo, hash);
#endif
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
//
sph_bmw512_init( &ctx.bmw);
@@ -190,7 +190,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
sph_hamsi512(&ctx.hamsi, hash, 64);
sph_hamsi512_close(&ctx.hamsi, hash);
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
//
sph_bmw512_init( &ctx.bmw);
@@ -252,7 +252,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
sph_fugue512(&ctx.fugue, hash, 64);
sph_fugue512_close(&ctx.fugue, hash);
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
//
sph_bmw512_init( &ctx.bmw);
@@ -336,7 +336,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
sph_shavite512(&ctx.shavite, hash, 64);
sph_shavite512_close(&ctx.shavite, hash);
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
//
sph_bmw512_init( &ctx.bmw);
@@ -410,7 +410,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
sph_whirlpool(&ctx.whirlpool, hash, 64);
sph_whirlpool_close(&ctx.whirlpool, hash);
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
//
sph_bmw512_init( &ctx.bmw);
sph_bmw512(&ctx.bmw, hash, 64);
@@ -487,7 +487,7 @@ int sonoa_hash( void *state, const void *input, int thrid )
sph_whirlpool(&ctx.whirlpool, hash, 64);
sph_whirlpool_close(&ctx.whirlpool, hash);
if ( work_restart[thrid].restart ) return 0;
if ( work_restart[thr_id].restart ) return 0;
//
sph_bmw512_init( &ctx.bmw);
@@ -569,34 +569,4 @@ int sonoa_hash( void *state, const void *input, int thrid )
return 1;
}
int scanhash_sonoa( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm128_bswap32_80( edata, pdata );
do
{
edata[19] = n;
if ( sonoa_hash( hash64, edata, thr_id ) )
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n );
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
}
#endif

View File

@@ -57,7 +57,7 @@ union _x17_8way_context_overlay
} __attribute__ ((aligned (64)));
typedef union _x17_8way_context_overlay x17_8way_context_overlay;
void x17_8way_hash( void *state, const void *input )
int x17_8way_hash( void *state, const void *input, int thr_id )
{
uint64_t vhash[8*8] __attribute__ ((aligned (128)));
uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
@@ -230,50 +230,8 @@ void x17_8way_hash( void *state, const void *input )
haval256_5_8way_init( &ctx.haval );
haval256_5_8way_update( &ctx.haval, vhashA, 64 );
haval256_5_8way_close( &ctx.haval, state );
}
int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash32[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash32_d7 = &(hash32[7*8]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32_d7 = ptarget[7];
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
x17_8way_hash( hash32, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( ( hash32_d7[ lane ] <= targ32_d7 ) && !bench ) )
{
extr_lane_8x32( lane_hash, hash32, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
return 1;
}
#elif defined(X17_4WAY)
@@ -300,7 +258,7 @@ union _x17_4way_context_overlay
};
typedef union _x17_4way_context_overlay x17_4way_context_overlay;
void x17_4way_hash( void *state, const void *input )
int x17_4way_hash( void *state, const void *input, int thr_id )
{
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
@@ -399,49 +357,8 @@ void x17_4way_hash( void *state, const void *input )
haval256_5_4way_init( &ctx.haval );
haval256_5_4way_update( &ctx.haval, vhashB, 64 );
haval256_5_4way_close( &ctx.haval, state );
}
int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash32[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash32_d7 = &(hash32[ 7*4 ]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32_d7 = ptarget[7];
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
x17_4way_hash( hash32, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 && !bench ) )
{
extr_lane_4x32( lane_hash, hash32, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
return 1;
}
#endif

View File

@@ -3,13 +3,12 @@
bool register_x17_algo( algo_gate_t* gate )
{
#if defined (X17_8WAY)
gate->scanhash = (void*)&scanhash_x17_8way;
gate->scanhash = (void*)&scanhash_8way_64in_32out;
gate->hash = (void*)&x17_8way_hash;
#elif defined (X17_4WAY)
gate->scanhash = (void*)&scanhash_x17_4way;
gate->scanhash = (void*)&scanhash_4way_64in_32out;
gate->hash = (void*)&x17_4way_hash;
#else
gate->scanhash = (void*)&scanhash_x17;
gate->hash = (void*)&x17_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;

View File

@@ -14,20 +14,15 @@ bool register_x17_algo( algo_gate_t* gate );
#if defined(X17_8WAY)
void x17_8way_hash( void *state, const void *input );
int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
int x17_8way_hash( void *state, const void *input, int thr_id );
#elif defined(X17_4WAY)
void x17_4way_hash( void *state, const void *input );
int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
int x17_4way_hash( void *state, const void *input, int thr_id );
#endif
void x17_hash( void *state, const void *input );
int scanhash_x17( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
int x17_hash( void *state, const void *input, int thr_id );
#endif

View File

@@ -56,7 +56,7 @@ union _x17_context_overlay
};
typedef union _x17_context_overlay x17_context_overlay;
void x17_hash(void *output, const void *input)
int x17_hash(void *output, const void *input, int thr_id )
{
// unsigned char hash[64 * 4] __attribute__((aligned(64))) = {0};
unsigned char hash[64] __attribute__((aligned(64)));
@@ -143,36 +143,8 @@ void x17_hash(void *output, const void *input)
sph_haval256_5_init(&ctx.haval);
sph_haval256_5( &ctx.haval, (const void*)hash, 64 );
sph_haval256_5_close( &ctx.haval, output );
}
int scanhash_x17( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm128_bswap32_80( edata, pdata );
do
{
edata[19] = n;
x17_hash( hash64, edata );
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n );
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
return 1;
}
#endif

View File

@@ -57,7 +57,7 @@ union _xevan_8way_context_overlay
} __attribute__ ((aligned (64)));
typedef union _xevan_8way_context_overlay xevan_8way_context_overlay;
void xevan_8way_hash( void *output, const void *input )
int xevan_8way_hash( void *output, const void *input, int thr_id )
{
uint64_t vhash[16<<3] __attribute__ ((aligned (128)));
uint64_t vhashA[16<<3] __attribute__ ((aligned (64)));
@@ -395,50 +395,8 @@ void xevan_8way_hash( void *output, const void *input )
haval256_5_8way_init( &ctx.haval );
haval256_5_8way_update( &ctx.haval, vhashA, dataLen );
haval256_5_8way_close( &ctx.haval, output );
}
int scanhash_xevan_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[7*8]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
xevan_8way_hash( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
return 1;
}
#elif defined(XEVAN_4WAY)
@@ -465,7 +423,7 @@ union _xevan_4way_context_overlay
};
typedef union _xevan_4way_context_overlay xevan_4way_context_overlay;
void xevan_4way_hash( void *output, const void *input )
int xevan_4way_hash( void *output, const void *input, int thr_id )
{
uint64_t hash0[16] __attribute__ ((aligned (64)));
uint64_t hash1[16] __attribute__ ((aligned (64)));
@@ -666,49 +624,8 @@ void xevan_4way_hash( void *output, const void *input )
haval256_5_4way_init( &ctx.haval );
haval256_5_4way_update( &ctx.haval, vhashA, dataLen );
haval256_5_4way_close( &ctx.haval, output );
}
int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[16*4] __attribute__ ((aligned (128)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
int thr_id = mythr->id;
__m256i *noncev = (__m256i*)vdata + 9;
const uint32_t targ32 = ptarget[7];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x0cff;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do {
xevan_4way_hash( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hashd7[ lane ] <= targ32 ) && ! bench )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
return 1;
}
#endif

View File

@@ -3,14 +3,13 @@
bool register_xevan_algo( algo_gate_t* gate )
{
#if defined (XEVAN_8WAY)
gate->scanhash = (void*)&scanhash_xevan_8way;
gate->scanhash = (void*)&scanhash_8way_64in_32out;
gate->hash = (void*)&xevan_8way_hash;
#elif defined (XEVAN_4WAY)
gate->scanhash = (void*)&scanhash_xevan_4way;
gate->scanhash = (void*)&scanhash_4way_64in_32out;
gate->hash = (void*)&xevan_4way_hash;
#else
init_xevan_ctx();
gate->scanhash = (void*)&scanhash_xevan;
gate->hash = (void*)&xevan_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;

View File

@@ -14,26 +14,15 @@ bool register_xevan_algo( algo_gate_t* gate );
#if defined(XEVAN_8WAY)
void xevan_8way_hash( void *state, const void *input );
int xevan_8way_hash( void *state, const void *input, int thr_id );
int scanhash_xevan_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(XEVAN_4WAY)
void xevan_4way_hash( void *state, const void *input );
int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
//void init_xevan_4way_ctx();
int xevan_4way_hash( void *state, const void *input, int thr_id );
#else
void xevan_hash( void *state, const void *input );
int scanhash_xevan( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
int xevan_hash( void *state, const void *input, int trh_id );
void init_xevan_ctx();
#endif

View File

@@ -83,7 +83,7 @@ void init_xevan_ctx()
#endif
};
void xevan_hash(void *output, const void *input)
int xevan_hash(void *output, const void *input, int thr_id )
{
uint32_t _ALIGN(64) hash[32]; // 128 bytes required
const int dataLen = 128;
@@ -218,36 +218,8 @@ void xevan_hash(void *output, const void *input)
sph_haval256_5_close(&ctx.haval, hash);
memcpy(output, hash, 32);
}
int scanhash_xevan( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm128_bswap32_80( edata, pdata );
do
{
edata[19] = n;
xevan_hash( hash64, edata );
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n );
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < max_nonce && !work_restart[thr_id].restart );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
return 1;
}
#endif

View File

@@ -445,7 +445,7 @@ bool register_yescrypt_algo( algo_gate_t* gate )
YESCRYPT_P = 1;
applog( LOG_NOTICE,"Yescrypt parameters: N= %d, R= %d.", YESCRYPT_N,
applog( LOG_NOTICE,"Yescrypt parameters: N= %d, R= %d", YESCRYPT_N,
YESCRYPT_R );
if ( yescrypt_client_key )
applog( LOG_NOTICE,"Key= \"%s\"\n", yescrypt_client_key );

View File

@@ -139,7 +139,7 @@ bool register_yespower_algo( algo_gate_t* gate )
yespower_params.perslen = 0;
}
applog( LOG_NOTICE,"Yespower parameters: N= %d, R= %d.", yespower_params.N,
applog( LOG_NOTICE,"Yespower parameters: N= %d, R= %d", yespower_params.N,
yespower_params.r );
if ( yespower_params.pers )
applog( LOG_NOTICE,"Key= \"%s\"\n", yespower_params.pers );
@@ -264,7 +264,7 @@ bool register_power2b_algo( algo_gate_t* gate )
yespower_params.pers = "Now I am become Death, the destroyer of worlds";
yespower_params.perslen = 46;
applog( LOG_NOTICE,"yespower-b2b parameters: N= %d, R= %d.", yespower_params.N,
applog( LOG_NOTICE,"yespower-b2b parameters: N= %d, R= %d", yespower_params.N,
yespower_params.r );
applog( LOG_NOTICE,"Key= \"%s\"", yespower_params.pers );
applog( LOG_NOTICE,"Key length= %d\n", yespower_params.perslen );

View File

@@ -76,7 +76,7 @@ typedef struct {
unsigned char uc[32];
} yespower_binary_t __attribute__ ((aligned (64)));
yespower_params_t yespower_params;
extern yespower_params_t yespower_params;
//SHA256_CTX sha256_prehash_ctx;
extern __thread SHA256_CTX sha256_prehash_ctx;

View File

@@ -9,15 +9,15 @@ rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpumi
make distclean || echo clean
rm -f config.status
./autogen.sh || echo done
CFLAGS="-O3 -march=icelake-client -Wall" ./configure --with-curl
make -j 16
CFLAGS="-O3 -march=icelake-client -Wall -fno-common" ./configure --with-curl
make -j 8
strip -s cpuminer.exe
mv cpuminer.exe cpuminer-avx512-sha-vaes.exe
strip -s cpuminer
mv cpuminer cpuminer-avx512-sha-vaes
CFLAGS="-O3 -march=skylake-avx512 -Wall" ./configure --with-curl
make -j 16
CFLAGS="-O3 -march=skylake-avx512 -Wall -fno-common" ./configure --with-curl
make -j 8
strip -s cpuminer.exe
mv cpuminer.exe cpuminer-avx512.exe
strip -s cpuminer
@@ -26,8 +26,8 @@ mv cpuminer cpuminer-avx512
make clean || echo clean
rm -f config.status
# GCC 9 doesn't include AES with core-avx2
CFLAGS="-O3 -march=core-avx2 -maes -Wall" ./configure --with-curl
make -j 16
CFLAGS="-O3 -march=core-avx2 -maes -Wall -fno-common" ./configure --with-curl
make -j 8
strip -s cpuminer.exe
mv cpuminer.exe cpuminer-avx2.exe
strip -s cpuminer
@@ -35,17 +35,17 @@ mv cpuminer cpuminer-avx2
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=corei7-avx -maes -Wall" ./configure --with-curl
make -j 16
CFLAGS="-O3 -march=corei7-avx -maes -Wall -fno-common" ./configure --with-curl
make -j 8
strip -s cpuminer.exe
mv cpuminer.exe cpuminer-avx.exe
strip -s cpuminer
mv cpuminer cpuminer-aes-avx
mv cpuminer cpuminer-avx
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -maes -msse4.2 -Wall" ./configure --with-curl
make -j 16
CFLAGS="-O3 -maes -msse4.2 -Wall -fno-common" ./configure --with-curl
make -j 8
strip -s cpuminer.exe
mv cpuminer.exe cpuminer-aes-sse42.exe
strip -s cpuminer
@@ -53,8 +53,8 @@ mv cpuminer cpuminer-aes-sse42
#make clean || echo clean
#rm -f config.status
#CFLAGS="-O3 -march=corei7 -Wall" ./configure --with-curl
#make -j 16
#CFLAGS="-O3 -march=corei7 -Wall -fno-common" ./configure --with-curl
#make -j 8
#strip -s cpuminer.exe
#mv cpuminer.exe cpuminer-sse42.exe
#strip -s cpuminer
@@ -62,8 +62,8 @@ mv cpuminer cpuminer-aes-sse42
#make clean || echo clean
#rm -f config.status
#CFLAGS="-O3 -march=core2 -Wall" ./configure --with-curl
#make -j 16
#CFLAGS="-O3 -march=core2 -Wall -fno-common" ./configure --with-curl
#make -j 8
#strip -s cpuminer.exe
#mv cpuminer.exe cpuminer-ssse3.exe
#strip -s cpuminer
@@ -71,8 +71,8 @@ mv cpuminer cpuminer-aes-sse42
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -msse2 -Wall" ./configure --with-curl
make -j 16
CFLAGS="-O3 -msse2 -Wall -fno-common" ./configure --with-curl
make -j 8
strip -s cpuminer.exe
mv cpuminer.exe cpuminer-sse2.exe
strip -s cpuminer
@@ -80,8 +80,8 @@ mv cpuminer cpuminer-sse2
make clean || echo done
rm -f config.status
CFLAGS="-O3 -march=znver1 -Wall" ./configure --with-curl
make -j 16
CFLAGS="-O3 -march=znver1 -Wall -fno-common" ./configure --with-curl
make -j 8
strip -s cpuminer.exe
mv cpuminer.exe cpuminer-zen.exe
strip -s cpuminer
@@ -89,8 +89,8 @@ mv cpuminer cpuminer-zen
make clean || echo done
rm -f config.status
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
make -j 16
CFLAGS="-O3 -march=native -Wall -fno-common" ./configure --with-curl
make -j 8
strip -s cpuminer.exe
strip -s cpuminer

27
build-no-common.sh Executable file
View File

@@ -0,0 +1,27 @@
#!/bin/bash
#if [ "$OS" = "Windows_NT" ]; then
# ./mingw64.sh
# exit 0
#fi
# Linux build
make distclean || echo clean
rm -f config.status
./autogen.sh || echo done
# Ubuntu 10.04 (gcc 4.4)
# extracflags="-O3 -march=native -Wall -D_REENTRANT -funroll-loops -fvariable-expansion-in-unroller -fmerge-all-constants -fbranch-target-load-optimize2 -fsched2-use-superblocks -falign-loops=16 -falign-functions=16 -falign-jumps=16 -falign-labels=16"
# Debian 7.7 / Ubuntu 14.04 (gcc 4.7+)
#extracflags="$extracflags -Ofast -flto -fuse-linker-plugin -ftree-loop-if-convert-stores"
#CFLAGS="-O3 -march=native -Wall" ./configure --with-curl --with-crypto=$HOME/usr
CFLAGS="-O3 -march=native -Wall -fno-common" ./configure --with-curl
#CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
make -j 4
strip -s cpuminer

View File

@@ -3,8 +3,8 @@
# imake clean and rm all the targetted executables.
# tips to users.
rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen > /dev/null
rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen > /dev/null
rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-aes-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe > /dev/null
rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe > /dev/null
make distclean > /dev/null

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.13.1.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.14.3.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.13.1'
PACKAGE_STRING='cpuminer-opt 3.13.1'
PACKAGE_VERSION='3.14.3'
PACKAGE_STRING='cpuminer-opt 3.14.3'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.13.1 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.14.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.13.1:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.14.3:";;
esac
cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.13.1
cpuminer-opt configure 3.14.3
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.13.1, which was
It was created by cpuminer-opt $as_me 3.14.3, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.13.1'
VERSION='3.14.3'
cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.13.1, which was
This file was extended by cpuminer-opt $as_me 3.14.3, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.13.1
cpuminer-opt config.status 3.14.3
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.13.1])
AC_INIT([cpuminer-opt], [3.14.3])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

File diff suppressed because it is too large Load Diff

42
miner.h
View File

@@ -83,6 +83,8 @@ enum {
};
#endif
extern bool is_power_of_2( int n );
static inline bool is_windows(void)
{
#ifdef WIN32
@@ -313,6 +315,10 @@ size_t address_to_script( unsigned char *out, size_t outsz, const char *addr );
int timeval_subtract( struct timeval *result, struct timeval *x,
struct timeval *y);
// Segwit BEGIN
extern void memrev(unsigned char *p, size_t len);
// Segwit END
// Bitcoin formula for converting difficulty to an equivalent
// number of hashes.
//
@@ -324,12 +330,12 @@ int timeval_subtract( struct timeval *result, struct timeval *x,
#define EXP16 65536.
#define EXP32 4294967296.
const long double exp32; // 2**32
const long double exp48; // 2**48
const long double exp64; // 2**64
const long double exp96; // 2**96
const long double exp128; // 2**128
const long double exp160; // 2**160
extern const long double exp32; // 2**32
extern const long double exp48; // 2**48
extern const long double exp64; // 2**64
extern const long double exp96; // 2**96
extern const long double exp128; // 2**128
extern const long double exp160; // 2**160
bool fulltest( const uint32_t *hash, const uint32_t *target );
bool valid_hash( const void*, const void* );
@@ -374,36 +380,25 @@ void cpu_brand_string( char* s );
float cpu_temp( int core );
*/
struct work {
struct work
{
uint32_t target[8] __attribute__ ((aligned (64)));
uint32_t data[48] __attribute__ ((aligned (64)));
uint32_t target[8] __attribute__ ((aligned (64)));
double targetdiff;
// double shareratio;
double sharediff;
double stratum_diff;
int height;
char *txs;
char *workid;
char *job_id;
size_t xnonce2_len;
unsigned char *xnonce2;
bool sapling;
bool stale;
// x16rt
uint32_t merkleroothash[8];
uint32_t witmerkleroothash[8];
uint32_t denom10[8];
uint32_t denom100[8];
uint32_t denom1000[8];
uint32_t denom10000[8];
} __attribute__ ((aligned (64)));
struct stratum_job {
struct stratum_job
{
unsigned char prevhash[32];
unsigned char final_sapling_hash[32];
char *job_id;
@@ -417,7 +412,7 @@ struct stratum_job {
unsigned char ntime[4];
double diff;
bool clean;
// for x16rt
// for x16rt-veil
unsigned char extra[64];
unsigned char denom10[32];
unsigned char denom100[32];
@@ -752,6 +747,7 @@ extern double opt_diff_factor;
extern double opt_target_factor;
extern bool opt_randomize;
extern bool allow_mininginfo;
extern pthread_rwlock_t g_work_lock;
extern time_t g_work_time;
extern bool opt_stratum_stats;
extern int num_cpus;

View File

@@ -375,10 +375,10 @@ static inline void memcpy_512( __m512i *dst, const __m512i *src, const int n )
// Generic for odd rotations
#define mm512_ror_x64( v, n ) _mm512_alignr_epi64( v, v, n )
#define mm512_rol_x64( v, n ) _mm512_alignr_epi64( v, v, 8-n )
#define mm512_rol_x64( v, n ) _mm512_alignr_epi64( v, v, 8-(n) )
#define mm512_ror_x32( v, n ) _mm512_alignr_epi32( v, v, n )
#define mm512_rol_x32( v, n ) _mm512_alignr_epi32( v, v, 16-n )
#define mm512_rol_x32( v, n ) _mm512_alignr_epi32( v, v, 16-(n) )
#define mm512_ror_1x16( v ) \
_mm512_permutexvar_epi16( m512_const_64( \

24
util.c
View File

@@ -81,6 +81,15 @@ struct thread_q {
pthread_cond_t cond;
};
bool is_power_of_2( int n )
{
while ( n > 1 )
{
if ( n % 2 != 0 ) return false;
n = n / 2;
}
return true;
}
void applog2( int prio, const char *fmt, ... )
{
@@ -609,6 +618,8 @@ json_t *json_rpc_call(CURL *curl, const char *url,
goto err_out;
}
// want_stratum is useless, and so is this code it seems. Nothing in
// hi appears to be set.
/* If X-Stratum was found, activate Stratum */
if (want_stratum && hi.stratum_url &&
!strncasecmp(hi.stratum_url, "stratum+tcp://", 14)) {
@@ -747,6 +758,19 @@ err_out:
return cfg;
}
// Segwit BEGIN
void memrev(unsigned char *p, size_t len)
{
unsigned char c, *q;
for (q = p + len - 1; p < q; p++, q--) {
c = *p;
*p = *q;
*q = c;
}
}
// Segwit END
void cbin2hex(char *out, const char *in, size_t len)
{
if (out) {

View File

@@ -44,14 +44,14 @@ cp $LOCAL_LIB/curl/lib/.libs/libcurl-4.dll release/
rm -f config.status
./autogen.sh || echo done
CFLAGS="-O3 -march=icelake-client -Wall" ./configure $CONFIGURE_ARGS
make -j 16
make -j 8
strip -s cpuminer.exe
mv cpuminer.exe release/cpuminer-avx512-sha-vaes.exe
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=znver1 -Wall" ./configure $CONFIGURE_ARGS
make -j 16
make -j 8
strip -s cpuminer.exe
mv cpuminer.exe release/cpuminer-zen.exe
@@ -60,7 +60,7 @@ make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=skylake-avx512 -Wall" ./configure $CONFIGURE_ARGS
#CFLAGS="-O3 -march=skylake-avx512 -Wall -fno-asynchronous-unwind-tables" ./configure $CONFIGURE_ARGS
make -j 16
make -j 8
strip -s cpuminer.exe
mv cpuminer.exe release/cpuminer-avx512.exe
@@ -68,7 +68,7 @@ make clean || echo clean
rm -f config.status
# GCC 9 doesn't include AES in -march=core-avx2
CFLAGS="-O3 -march=core-avx2 -maes -Wall" ./configure $CONFIGURE_ARGS
make -j 16
make -j 8
strip -s cpuminer.exe
mv cpuminer.exe release/cpuminer-avx2.exe
@@ -76,7 +76,7 @@ make clean || echo clean
rm -f config.status
# -march=corei7-avx still includes aes, but just in case
CFLAGS="-O3 -march=corei7-avx -maes -Wall" ./configure $CONFIGURE_ARGS
make -j 16
make -j 8
strip -s cpuminer.exe
mv cpuminer.exe release/cpuminer-avx.exe
@@ -85,7 +85,7 @@ make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=westmere -Wall" ./configure $CONFIGURE_ARGS
#CFLAGS="-O3 -maes -msse4.2 -Wall" ./configure $CONFIGURE_ARGS
make -j 16
make -j 8
strip -s cpuminer.exe
mv cpuminer.exe release/cpuminer-aes-sse42.exe
@@ -107,7 +107,7 @@ mv cpuminer.exe release/cpuminer-aes-sse42.exe
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -msse2 -Wall" ./configure $CONFIGURE_ARGS
make -j 16
make -j 8
strip -s cpuminer.exe
mv cpuminer.exe release/cpuminer-sse2.exe
make clean || echo clean