This commit is contained in:
Jay D Dee
2019-09-26 22:37:26 -04:00
parent a042fb7612
commit 01550d94a2
47 changed files with 1240 additions and 1544 deletions

View File

@@ -18,7 +18,6 @@ dist_man_MANS = cpuminer.1
cpuminer_SOURCES = \
cpu-miner.c \
util.c \
uint256.cpp \
api.c \
sysinfos.c \
algo-gate-api.c\
@@ -263,6 +262,8 @@ cpuminer_SOURCES = \
algo/x16/x16r-gate.c \
algo/x16/x16r.c \
algo/x16/x16r-4way.c \
algo/x16/x16rv2.c \
algo/x16/x16rv2-4way.c \
algo/x16/x16rt.c \
algo/x16/x16rt-4way.c \
algo/x16/hex.c \

View File

@@ -24,7 +24,7 @@ Requirements
1. A x86_64 architecture CPU with a minimum of SSE2 support. This includes
Intel Core2 and newer and AMD equivalents. In order to take advantage of AES_NI
optimizations a CPU with AES_NI is required. This includes Intel Westbridge
optimizations a CPU with AES_NI is required. This includes Intel Westmere
and newer and AMD equivalents. Further optimizations are available on some
algoritms for CPUs with AVX and AVX2, Sandybridge and Haswell respectively.
@@ -87,8 +87,9 @@ Supported Algorithms
neoscrypt NeoScrypt(128, 2, 1)
nist5 Nist5
pentablake Pentablake
phi1612 phi, LUX coin (original algo)
phi2 LUX coin (new algo)
phi1612 phi
phi2 Luxcoin (LUX)
phi2-lux identical to phi2
pluck Pluck:128 (Supcoin)
polytimos Ninja
quark Quark
@@ -120,7 +121,8 @@ Supported Algorithms
x13sm3 hsr (Hshare)
x14 X14
x15 X15
x16r Ravencoin (RVN)
x16r Ravencoin (RVN) (original algo)
x16rv2 Ravencoin (RVN) (new algo)
x16rt Gincoin (GIN)
x16rt_veil Veil (VEIL)
x16s Pigeoncoin (PGN)
@@ -155,14 +157,15 @@ Benchmark testing does not work for x11evo.
Bugs
----
Users are encouraged to post their bug reports on the Bitcoin Talk
forum at:
Users are encouraged to post their bug reports using git issues or on the
Bitcoin Talk forum at:
https://bitcointalk.org/index.php?topic=1326803.0
All problem reports must be accompanied by a proper definition.
All problem reports must be accompanied by a proper problem definition.
This should include how the problem occurred, the command line and
output from the miner showing the startup and any errors.
output from the miner showing the startup messages and any errors.
A history is also useful, ie did it work before.
Donations
---------

View File

@@ -38,6 +38,14 @@ supported.
Change Log
----------
v3.9.8
Changes to log output to provide data more relevant to actual mining
performance.
phi2 can now handle pools with a mix of coins that use and don't use roots.
phi2-lux added as an alias for phi2 as they are identical except for roots.
Add x16rv2 algo for Ravencoin fork.
v3.9.7
Command line option changes:

View File

@@ -122,7 +122,6 @@ void init_algo_gate( algo_gate_t* gate )
gate->stratum_gen_work = (void*)&std_stratum_gen_work;
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
gate->malloc_txs_request = (void*)&std_malloc_txs_request;
gate->set_target = (void*)&std_set_target;
gate->submit_getwork_result = (void*)&std_le_submit_getwork_result;
gate->build_block_header = (void*)&std_build_block_header;
gate->build_extraheader = (void*)&std_build_extraheader;
@@ -234,6 +233,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_X14: register_x14_algo ( gate ); break;
case ALGO_X15: register_x15_algo ( gate ); break;
case ALGO_X16R: register_x16r_algo ( gate ); break;
case ALGO_X16RV2: register_x16rv2_algo ( gate ); break;
case ALGO_X16RT: register_x16rt_algo ( gate ); break;
case ALGO_X16RT_VEIL: register_x16rt_veil_algo ( gate ); break;
case ALGO_X16S: register_x16s_algo ( gate ); break;
@@ -337,7 +337,7 @@ const char* const algo_alias_map[][2] =
{ "myriad", "myr-gr" },
{ "neo", "neoscrypt" },
{ "phi", "phi1612" },
// { "sia", "blake2b" },
{ "phi2-lux", "phi2" },
{ "sib", "x11gost" },
{ "timetravel8", "timetravel" },
{ "veil", "x16rt-veil" },
@@ -365,40 +365,3 @@ void get_algo_alias( char** algo_or_alias )
#undef ALIAS
#undef PROPER
bool submit_solution( struct work *work, void *hash,
struct thr_info *thr )
{
work_set_target_ratio( work, hash );
if ( submit_work( thr, work ) )
{
if ( !opt_quiet )
applog( LOG_BLUE, "Share %d submitted by thread %d, job %s.",
accepted_share_count + rejected_share_count + 1,
thr->id, work->job_id );
return true;
}
else
applog( LOG_WARNING, "Failed to submit share." );
return false;
}
bool submit_lane_solution( struct work *work, void *hash,
struct thr_info *thr, int lane )
{
work_set_target_ratio( work, hash );
if ( submit_work( thr, work ) )
{
if ( !opt_quiet )
// applog( LOG_BLUE, "Share %d submitted by thread %d, lane %d.",
// accepted_share_count + rejected_share_count + 1,
// thr->id, lane );
applog( LOG_BLUE, "Share %d submitted by thread %d, lane %d, job %s.",
accepted_share_count + rejected_share_count + 1, thr->id,
lane, work->job_id );
return true;
}
else
applog( LOG_WARNING, "Failed to submit share." );
return false;
}

View File

@@ -132,7 +132,6 @@ void ( *decode_extra_data ) ( struct work*, uint64_t* );
void ( *wait_for_diff ) ( struct stratum_ctx* );
int64_t ( *get_max64 ) ();
bool ( *work_decode ) ( const json_t*, struct work* );
void ( *set_target) ( struct work*, double );
bool ( *submit_getwork_result ) ( CURL*, struct work* );
void ( *gen_merkle_root ) ( char*, struct stratum_ctx* );
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
@@ -193,15 +192,6 @@ void four_way_not_tested();
// allways returns failure
int null_scanhash();
// Allow algos to submit from scanhash loop.
bool submit_solution( struct work *work, void *hash,
struct thr_info *thr );
bool submit_lane_solution( struct work *work, void *hash,
struct thr_info *thr, int lane );
bool submit_work( struct thr_info *thr, const struct work *work_in );
// displays warning
void null_hash ();
void null_hash_suw();
@@ -232,10 +222,6 @@ int64_t get_max64_0x3fffffLL();
int64_t get_max64_0x1ffff();
int64_t get_max64_0xffffLL();
void std_set_target( struct work *work, double job_diff );
void alt_set_target( struct work* work, double job_diff );
void scrypt_set_target( struct work *work, double job_diff );
bool std_le_work_decode( const json_t *val, struct work *work );
bool std_be_work_decode( const json_t *val, struct work *work );
bool jr2_work_decode( const json_t *val, struct work *work );

View File

@@ -85,8 +85,9 @@ bool register_argon2_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_argon2;
gate->hash = (void*)&argon2hash;
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
gate->set_target = (void*)&scrypt_set_target;
gate->get_max64 = (void*)&argon2_get_max64;
opt_target_factor = 65536.0;
return true;
};

View File

@@ -67,8 +67,8 @@ bool register_argon2d_crds_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_argon2d_crds;
gate->hash = (void*)&argon2d_crds_hash;
gate->set_target = (void*)&scrypt_set_target;
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
opt_target_factor = 65536.0;
return true;
}
@@ -135,8 +135,8 @@ bool register_argon2d_dyn_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_argon2d_dyn;
gate->hash = (void*)&argon2d_dyn_hash;
gate->set_target = (void*)&scrypt_set_target;
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
opt_target_factor = 65536.0;
return true;
}
@@ -184,9 +184,9 @@ int64_t get_max64_0x1ff() { return 0x1ff; }
bool register_argon2d4096_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_argon2d4096;
gate->set_target = (void*)&scrypt_set_target;
gate->get_max64 = (void*)&get_max64_0x1ff;
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
opt_target_factor = 65536.0;
return true;
}

View File

@@ -5,8 +5,8 @@ int64_t bmw512_get_max64() { return 0x7ffffLL; }
bool register_bmw512_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT;
gate->set_target = (void*)&alt_set_target;
gate->get_max64 = (void*)&bmw512_get_max64;
opt_target_factor = 256.0;
#if defined (BMW512_4WAY)
gate->scanhash = (void*)&scanhash_bmw512_4way;
gate->hash = (void*)&bmw512hash_4way;

View File

@@ -94,19 +94,14 @@ int scanhash_groestl( struct work *work, uint32_t max_nonce,
return 0;
}
void groestl_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
}
bool register_dmd_gr_algo( algo_gate_t* gate )
{
init_groestl_ctx();
gate->optimizations = SSE2_OPT | AES_OPT;
gate->scanhash = (void*)&scanhash_groestl;
gate->hash = (void*)&groestlhash;
gate->set_target = (void*)&groestl_set_target;
gate->get_max64 = (void*)&get_max64_0x3ffff;
opt_target_factor = 256.0;
return true;
};

View File

@@ -15,11 +15,6 @@ pthread_barrier_t hodl_barrier;
// need to be passed.
unsigned char *hodl_scratchbuf = NULL;
void hodl_set_target( struct work* work, double diff )
{
diff_to_target(work->target, diff / 8388608.0 );
}
void hodl_le_build_stratum_request( char* req, struct work* work,
struct stratum_ctx *sctx )
{
@@ -170,7 +165,6 @@ bool register_hodl_algo( algo_gate_t* gate )
gate->scanhash = (void*)&hodl_scanhash;
gate->get_new_work = (void*)&hodl_get_new_work;
gate->longpoll_rpc_call = (void*)&hodl_longpoll_rpc_call;
gate->set_target = (void*)&hodl_set_target;
gate->build_stratum_request = (void*)&hodl_le_build_stratum_request;
gate->malloc_txs_request = (void*)&hodl_malloc_txs_request;
gate->build_block_header = (void*)&hodl_build_block_header;
@@ -179,6 +173,7 @@ bool register_hodl_algo( algo_gate_t* gate )
gate->work_cmp_size = 76;
hodl_scratchbuf = (unsigned char*)malloc( 1 << 30 );
allow_getwork = false;
opt_target_factor = 8388608.0;
return ( hodl_scratchbuf != NULL );
}

View File

@@ -12,7 +12,7 @@ bool register_jha_algo( algo_gate_t* gate )
gate->hash = (void*)&jha_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->set_target = (void*)&scrypt_set_target;
opt_target_factor = 65536.0;
return true;
};

View File

@@ -1,18 +1,13 @@
#include "keccak-gate.h"
void keccak_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (128.0 * opt_diff_factor) );
}
int64_t keccak_get_max64() { return 0x7ffffLL; }
bool register_keccak_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT;
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
gate->set_target = (void*)&keccak_set_target;
gate->get_max64 = (void*)&keccak_get_max64;
opt_target_factor = 128.0;
#if defined (KECCAK_4WAY)
gate->scanhash = (void*)&scanhash_keccak_4way;
gate->hash = (void*)&keccakhash_4way;
@@ -23,17 +18,12 @@ bool register_keccak_algo( algo_gate_t* gate )
return true;
};
void keccakc_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
}
bool register_keccakc_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT;
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
gate->set_target = (void*)&keccakc_set_target;
gate->get_max64 = (void*)&keccak_get_max64;
opt_target_factor = 256.0;
#if defined (KECCAK_4WAY)
gate->scanhash = (void*)&scanhash_keccak_4way;
gate->hash = (void*)&keccakhash_4way;

View File

@@ -71,7 +71,7 @@ bool register_lyra2rev3_algo( algo_gate_t* gate )
#endif
gate->optimizations = SSE2_OPT | SSE42_OPT | AVX2_OPT;
gate->miner_thread_init = (void*)&lyra2rev3_thread_init;
gate->set_target = (void*)&alt_set_target;
opt_target_factor = 256.0;
return true;
};
@@ -105,7 +105,7 @@ bool register_lyra2rev2_algo( algo_gate_t* gate )
#endif
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
gate->set_target = (void*)&alt_set_target;
opt_target_factor = 256.0;
return true;
};
@@ -128,7 +128,7 @@ bool register_lyra2z_algo( algo_gate_t* gate )
#endif
gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL;
gate->set_target = (void*)&alt_set_target;
opt_target_factor = 256.0;
return true;
};
@@ -148,7 +148,7 @@ bool register_lyra2h_algo( algo_gate_t* gate )
#endif
gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL;
gate->set_target = (void*)&alt_set_target;
opt_target_factor = 256.0;
return true;
};
@@ -168,8 +168,8 @@ bool register_allium_algo( algo_gate_t* gate )
gate->hash = (void*)&allium_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
gate->set_target = (void*)&alt_set_target;
gate->get_max64 = (void*)&allium_get_max64_0xFFFFLL;
opt_target_factor = 256.0;
return true;
};
@@ -182,6 +182,7 @@ int phi2_get_work_data_size() { return phi2_use_roots ? 144 : 128; }
void phi2_decode_extra_data( struct work *work )
{
phi2_use_roots = false;
if ( work->data[0] & ( 1<<30 ) ) phi2_use_roots = true;
else for ( int i = 20; i < 36; i++ )
{
@@ -213,8 +214,8 @@ bool register_phi2_algo( algo_gate_t* gate )
gate->get_work_data_size = (void*)&phi2_get_work_data_size;
gate->decode_extra_data = (void*)&phi2_decode_extra_data;
gate->build_extraheader = (void*)&phi2_build_extraheader;
gate->set_target = (void*)&alt_set_target;
gate->get_max64 = (void*)&get_max64_0xffffLL;
opt_target_factor = 256.0;
#if defined(PHI2_4WAY)
gate->scanhash = (void*)&scanhash_phi2_4way;
#else

View File

@@ -118,11 +118,6 @@ int64_t lyra2re_get_max64 ()
return 0xffffLL;
}
void lyra2re_set_target ( struct work* work, double job_diff )
{
work_set_target(work, job_diff / (128.0 * opt_diff_factor) );
}
bool register_lyra2re_algo( algo_gate_t* gate )
{
init_lyra2re_ctx();
@@ -130,7 +125,7 @@ bool register_lyra2re_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_lyra2re;
gate->hash = (void*)&lyra2re_hash;
gate->get_max64 = (void*)&lyra2re_get_max64;
gate->set_target = (void*)&lyra2re_set_target;
opt_target_factor = 128.0;
return true;
};

View File

@@ -53,11 +53,6 @@ int scanhash_lyra2z330( struct work *work, uint32_t max_nonce,
return 0;
}
void lyra2z330_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
}
bool lyra2z330_thread_init()
{
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 256; // nCols
@@ -76,7 +71,7 @@ bool register_lyra2z330_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_lyra2z330;
gate->hash = (void*)&lyra2z330_hash;
gate->get_max64 = (void*)&get_max64_0xffffLL;
gate->set_target = (void*)&lyra2z330_set_target;
opt_target_factor = 256.0;
return true;
};

View File

@@ -323,9 +323,9 @@ bool register_m7m_algo( algo_gate_t *gate )
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
gate->set_target = (void*)&scrypt_set_target;
gate->get_max64 = (void*)&get_max64_0x1ffff;
gate->set_work_data_endian = (void*)&set_work_data_big_endian;
opt_target_factor = 65536.0;
return true;
}

View File

@@ -10,8 +10,8 @@ bool register_hmq1725_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_hmq1725;
gate->hash = (void*)&hmq1725hash;
#endif
gate->set_target = (void*)&scrypt_set_target;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
opt_target_factor = 65536.0;
return true;
};

View File

@@ -409,14 +409,3 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
/*
bool register_hmq1725_algo( algo_gate_t* gate )
{
init_hmq1725_ctx();
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->set_target = (void*)&scrypt_set_target;
gate->scanhash = (void*)&scanhash_hmq1725;
gate->hash = (void*)&hmq1725hash;
return true;
};
*/

View File

@@ -41,6 +41,7 @@ void lbry_le_build_stratum_request( char *req, struct work *work,
free(xnonce2str);
}
/*
void lbry_build_block_header( struct work* g_work, uint32_t version,
uint32_t *prevhash, uint32_t *merkle_root,
uint32_t ntime, uint32_t nbits )
@@ -63,6 +64,7 @@ void lbry_build_block_header( struct work* g_work, uint32_t version,
g_work->data[ LBRY_NBITS_INDEX ] = nbits;
g_work->data[28] = 0x80000000;
}
*/
void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{
@@ -92,11 +94,6 @@ void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
g_work->data[28] = 0x80000000;
}
void lbry_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
}
int64_t lbry_get_max64() { return 0x1ffffLL; }
int lbry_get_work_data_size() { return LBRY_WORK_DATA_SIZE; }
@@ -119,11 +116,11 @@ bool register_lbry_algo( algo_gate_t* gate )
gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
// gate->build_block_header = (void*)&build_block_header;
gate->build_extraheader = (void*)&lbry_build_extraheader;
gate->set_target = (void*)&lbry_set_target;
gate->ntime_index = LBRY_NTIME_INDEX;
gate->nbits_index = LBRY_NBITS_INDEX;
gate->nonce_index = LBRY_NONCE_INDEX;
gate->get_work_data_size = (void*)&lbry_get_work_data_size;
opt_target_factor = 256.0;
return true;
}

View File

@@ -1089,13 +1089,13 @@ bool register_neoscrypt_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_neoscrypt;
gate->hash = (void*)&neoscrypt;
gate->get_max64 = (void*)&get_neoscrypt_max64;
gate->set_target = (void*)&scrypt_set_target;
gate->wait_for_diff = (void*)&neoscrypt_wait_for_diff;
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
gate->set_work_data_endian = (void*)&set_work_data_big_endian;
gate->get_work_data_size = (void*)&neoscrypt_get_work_data_size;
opt_target_factor = 65536.0;
return true;
};

View File

@@ -503,8 +503,8 @@ bool register_pluck_algo( algo_gate_t* gate )
gate->miner_thread_init = (void*)&pluck_miner_thread_init;
gate->scanhash = (void*)&scanhash_pluck;
gate->hash = (void*)&pluck_hash;
gate->set_target = (void*)&scrypt_set_target;
gate->get_max64 = (void*)&pluck_get_max64;
opt_target_factor = 65536.0;
return true;
};

View File

@@ -783,8 +783,9 @@ bool register_scrypt_algo( algo_gate_t* gate )
gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
gate->scanhash = (void*)&scanhash_scrypt;
// gate->hash = (void*)&scrypt_1024_1_1_256_24way;
gate->set_target = (void*)&scrypt_set_target;
gate->get_max64 = (void*)&scrypt_get_max64;
opt_target_factor = 65536.0;
if ( !opt_param_n )
{

View File

@@ -240,8 +240,8 @@ bool register_scryptjane_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_scryptjane;
gate->hash = (void*)&scryptjanehash;
gate->set_target = (void*)&scrypt_set_target;
gate->get_max64 = (void*)&get_max64_0x40LL;
opt_target_factor = 65536.0;
// figure out if arg in N or Nfactor
if ( !opt_param_n )

View File

@@ -120,19 +120,13 @@ int scanhash_fresh( struct work *work,
return 0;
}
void fresh_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
}
bool register_fresh_algo( algo_gate_t* gate )
{
algo_not_tested();
gate->scanhash = (void*)&scanhash_fresh;
gate->hash = (void*)&freshhash;
gate->set_target = (void*)&fresh_set_target;
gate->get_max64 = (void*)&get_max64_0x3ffff;
opt_target_factor = 256.0;
return true;
};

View File

@@ -1,10 +1,5 @@
#include "timetravel-gate.h"
void tt8_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
}
bool register_timetravel_algo( algo_gate_t* gate )
{
#ifdef TIMETRAVEL_4WAY
@@ -16,9 +11,9 @@ bool register_timetravel_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_timetravel;
gate->hash = (void*)&timetravel_hash;
#endif
gate->set_target = (void*)&tt8_set_target;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL;
opt_target_factor = 256.0;
return true;
};

View File

@@ -1,10 +1,5 @@
#include "timetravel10-gate.h"
void tt10_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
}
bool register_timetravel10_algo( algo_gate_t* gate )
{
#ifdef TIMETRAVEL10_4WAY
@@ -16,9 +11,9 @@ bool register_timetravel10_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_timetravel10;
gate->hash = (void*)&timetravel10_hash;
#endif
gate->set_target = (void*)&tt10_set_target;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL;
opt_target_factor = 256.0;
return true;
};

View File

@@ -249,7 +249,6 @@ bool register_drop_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_drop;
gate->hash = (void*)&droplp_hash_pok;
gate->get_new_work = (void*)&drop_get_new_work;
gate->set_target = (void*)&scrypt_set_target;
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
@@ -257,6 +256,7 @@ bool register_drop_algo( algo_gate_t* gate )
gate->decode_extra_data = (void*)&drop_display_pok;
gate->get_work_data_size = (void*)&drop_get_work_data_size;
gate->work_cmp_size = 72;
opt_target_factor = 65536.0;
return true;
};

View File

@@ -68,21 +68,7 @@ void x16r_4way_hash( void* output, const void* input )
int size = 80;
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
/*
if ( s_ntime == UINT32_MAX )
{
const uint8_t* tmp = (uint8_t*) in0;
x16_r_s_getAlgoString( &tmp[4], hashOrder );
}
*/
// Input data is both 64 bit interleaved (input)
// and deinterleaved in inp0-3.
// If First function uses 64 bit data it is not required to interleave inp
// first. It may use the inerleaved data dmost convenient, ie 4way 64 bit.
// All other functions assume data is deinterleaved in hash0-3
// All functions must exit with data deinterleaved in hash0-3.
// Alias in0-3 points to either inp0-3 or hash0-3 according to
// its hashOrder position. Size is also set accordingly.
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];

View File

@@ -42,8 +42,23 @@ bool register_x16r_algo( algo_gate_t* gate )
gate->hash = (void*)&x16r_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->set_target = (void*)&alt_set_target;
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
opt_target_factor = 256.0;
return true;
};
bool register_x16rv2_algo( algo_gate_t* gate )
{
#if defined (X16R_4WAY)
gate->scanhash = (void*)&scanhash_x16rv2_4way;
gate->hash = (void*)&x16rv2_4way_hash;
#else
gate->scanhash = (void*)&scanhash_x16rv2;
gate->hash = (void*)&x16rv2_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
opt_target_factor = 256.0;
return true;
};
@@ -57,8 +72,8 @@ bool register_x16s_algo( algo_gate_t* gate )
gate->hash = (void*)&x16r_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->set_target = (void*)&alt_set_target;
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
opt_target_factor = 256.0;
return true;
};
@@ -189,7 +204,7 @@ bool register_x16rt_algo( algo_gate_t* gate )
gate->hash = (void*)&x16rt_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->set_target = (void*)&alt_set_target;
opt_target_factor = 256.0;
return true;
};
@@ -203,8 +218,8 @@ bool register_x16rt_veil_algo( algo_gate_t* gate )
gate->hash = (void*)&x16rt_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->set_target = (void*)&alt_set_target;
gate->build_extraheader = (void*)&veil_build_extraheader;
opt_target_factor = 256.0;
return true;
};
@@ -212,19 +227,13 @@ bool register_x16rt_veil_algo( algo_gate_t* gate )
//
// HEX
void hex_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (128.0 * opt_diff_factor) );
}
bool register_hex_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_hex;
gate->hash = (void*)&hex_hash;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
gate->set_target = (void*)&hex_set_target;
opt_target_factor = 128.0;
return true;
};
@@ -244,8 +253,8 @@ bool register_x21s_algo( algo_gate_t* gate )
gate->miner_thread_init = (void*)&x21s_thread_init;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
gate->set_target = (void*)&alt_set_target;
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
opt_target_factor = 256.0;
return true;
};

View File

@@ -38,6 +38,7 @@ void x16rt_getAlgoString( const uint32_t *timeHash, char *output );
void x16rt_getTimeHash( const uint32_t timeStamp, void* timeHash );
bool register_x16r_algo( algo_gate_t* gate );
bool register_x16rv2_algo( algo_gate_t* gate );
bool register_x16s_algo( algo_gate_t* gate );
bool register_x16rt_algo( algo_gate_t* gate );
bool register_hex__algo( algo_gate_t* gate );
@@ -49,6 +50,10 @@ void x16r_4way_hash( void *state, const void *input );
int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void x16rv2_4way_hash( void *state, const void *input );
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void x16rt_4way_hash( void *state, const void *input );
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
@@ -64,6 +69,10 @@ void x16r_hash( void *state, const void *input );
int scanhash_x16r( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void x16rv2_hash( void *state, const void *input );
int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void x16rt_hash( void *state, const void *input );
int scanhash_x16rt( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );

384
algo/x16/x16rv2-4way.c Normal file
View File

@@ -0,0 +1,384 @@
/**
* x16r algo implementation
*
* Implementation by tpruvot@github Jan 2018
* Optimized by JayDDee@github Jan 2018
*/
#include "x16r-gate.h"
#if defined (X16R_4WAY)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
#include "algo/keccak/keccak-hash-4way.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/luffa-hash-2way.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/simd-hash-2way.h"
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/hamsi/hamsi-hash-4way.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/shabal-hash-4way.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/sha/sha-hash-4way.h"
#include "algo/tiger/sph_tiger.h"
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16rv2_4way_context_overlay
{
blake512_4way_context blake;
bmw512_4way_context bmw;
hashState_echo echo;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
keccak512_4way_context keccak;
luffa_2way_context luffa;
cubehashParam cube;
sph_shavite512_context shavite;
simd_2way_context simd;
hamsi512_4way_context hamsi;
sph_fugue512_context fugue;
shabal512_4way_context shabal;
sph_whirlpool_context whirlpool;
sha512_4way_context sha512;
sph_tiger_context tiger;
};
typedef union _x16rv2_4way_context_overlay x16rv2_4way_context_overlay;
// Pad the 24 bytes tiger hash to 64 bytes
inline void padtiger512( uint32_t* hash )
{
for ( int i = 6; i < 16; i++ ) hash[i] = 0;
}
void x16rv2_4way_hash( void* output, const void* input )
{
uint32_t hash0[24] __attribute__ ((aligned (64)));
uint32_t hash1[24] __attribute__ ((aligned (64)));
uint32_t hash2[24] __attribute__ ((aligned (64)));
uint32_t hash3[24] __attribute__ ((aligned (64)));
uint32_t vhash[24*4] __attribute__ ((aligned (64)));
x16rv2_4way_context_overlay ctx;
void *in0 = (void*) hash0;
void *in1 = (void*) hash1;
void *in2 = (void*) hash2;
void *in3 = (void*) hash3;
int size = 80;
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case BLAKE:
blake512_4way_init( &ctx.blake );
if ( i == 0 )
blake512_4way( &ctx.blake, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
blake512_4way( &ctx.blake, vhash, size );
}
blake512_4way_close( &ctx.blake, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case BMW:
bmw512_4way_init( &ctx.bmw );
if ( i == 0 )
bmw512_4way( &ctx.bmw, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
bmw512_4way( &ctx.bmw, vhash, size );
}
bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case GROESTL:
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0,
(const char*)in0, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1,
(const char*)in1, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2,
(const char*)in2, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3,
(const char*)in3, size<<3 );
break;
case SKEIN:
skein512_4way_init( &ctx.skein );
if ( i == 0 )
skein512_4way( &ctx.skein, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
skein512_4way( &ctx.skein, vhash, size );
}
skein512_4way_close( &ctx.skein, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case JH:
jh512_4way_init( &ctx.jh );
if ( i == 0 )
jh512_4way( &ctx.jh, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
jh512_4way( &ctx.jh, vhash, size );
}
jh512_4way_close( &ctx.jh, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case KECCAK:
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in0, size );
sph_tiger_close( &ctx.tiger, hash0 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in1, size );
sph_tiger_close( &ctx.tiger, hash1 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in2, size );
sph_tiger_close( &ctx.tiger, hash2 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in3, size );
sph_tiger_close( &ctx.tiger, hash3 );
for ( int i = (24/4); i < (64/4); i++ )
hash0[i] = hash1[i] = hash2[i] = hash3[i] = 0;
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case LUFFA:
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in0, size );
sph_tiger_close( &ctx.tiger, hash0 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in1, size );
sph_tiger_close( &ctx.tiger, hash1 );
for ( int i = (24/4); i < (64/4); i++ )
hash0[i] = hash1[i] = 0;
intrlv_2x128( vhash, hash0, hash1, 512 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in2, size );
sph_tiger_close( &ctx.tiger, hash2 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in3, size );
sph_tiger_close( &ctx.tiger, hash3 );
for ( int i = (24/4); i < (64/4); i++ )
hash2[i] = hash3[i] = 0;
intrlv_2x128( vhash, hash2, hash3, 512 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
dintrlv_2x128( hash2, hash3, vhash, 512 );
break;
case CUBEHASH:
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash0,
(const byte*)in0, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash1,
(const byte*)in1, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash2,
(const byte*)in2, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash3,
(const byte*)in3, size );
break;
case SHAVITE:
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in0, size );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in1, size );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in2, size );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in3, size );
sph_shavite512_close( &ctx.shavite, hash3 );
break;
case SIMD:
intrlv_2x128( vhash, in0, in1, size<<3 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
intrlv_2x128( vhash, in2, in3, size<<3 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
dintrlv_2x128( hash2, hash3, vhash, 512 );
break;
case ECHO:
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash0,
(const BitSequence*)in0, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash1,
(const BitSequence*)in1, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash2,
(const BitSequence*)in2, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash3,
(const BitSequence*)in3, size<<3 );
break;
case HAMSI:
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, size );
hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in0, size );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in1, size );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in2, size );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in3, size );
sph_fugue512_close( &ctx.fugue, hash3 );
break;
case SHABAL:
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, size );
shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case WHIRLPOOL:
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in0, size );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in1, size );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in2, size );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in3, size );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
break;
case SHA_512:
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in0, size );
sph_tiger_close( &ctx.tiger, hash0 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in1, size );
sph_tiger_close( &ctx.tiger, hash1 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in2, size );
sph_tiger_close( &ctx.tiger, hash2 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in3, size );
sph_tiger_close( &ctx.tiger, hash3 );
for ( int i = (24/4); i < (64/4); i++ )
hash0[i] = hash1[i] = hash2[i] = hash3[i] = 0;
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, 64 );
sha512_4way_close( &ctx.sha512, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
}
size = 64;
}
memcpy( output, hash0, 32 );
memcpy( output+32, hash1, 32 );
memcpy( output+64, hash2, 32 );
memcpy( output+96, hash3, 32 );
}
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
__m256i *noncev = (__m256i*)vdata + 9; // aligned
volatile uint8_t *restart = &(work_restart[thr_id].restart);
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
if ( s_ntime != endiandata[17] )
{
uint32_t ntime = swab32(pdata[17]);
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
}
if ( opt_benchmark )
ptarget[7] = 0x0cff;
uint64_t *edata = (uint64_t*)endiandata;
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x16rv2_4way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 4; i++ ) if ( (hash+(i<<3))[7] <= Htarg )
if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
{
pdata[19] = n+i;
submit_lane_solution( work, hash+(i<<3), mythr, i );
}
n += 4;
} while ( ( n < max_nonce ) && !(*restart) );
*hashes_done = n - first_nonce + 1;
return 0;
}
#endif

247
algo/x16/x16rv2.c Normal file
View File

@@ -0,0 +1,247 @@
/**
* x16r algo implementation
*
* Implementation by tpruvot@github Jan 2018
* Optimized by JayDDee@github Jan 2018
*/
#include "x16r-gate.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/nist.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include <openssl/sha.h>
#include "algo/tiger/sph_tiger.h"
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#endif
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16rv2_context_overlay
{
#if defined(__AES__)
hashState_echo echo;
hashState_groestl groestl;
#else
sph_groestl512_context groestl;
sph_echo512_context echo;
#endif
sph_blake512_context blake;
sph_bmw512_context bmw;
sph_skein512_context skein;
sph_jh512_context jh;
sph_keccak512_context keccak;
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
hashState_sd simd;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
SHA512_CTX sha512;
sph_tiger_context tiger;
};
typedef union _x16rv2_context_overlay x16rv2_context_overlay;
// Pad the 24 bytes tiger hash to 64 bytes
inline void padtiger512(uint32_t* hash) {
for (int i = (24/4); i < (64/4); i++) hash[i] = 0;
}
void x16rv2_hash( void* output, const void* input )
{
uint32_t _ALIGN(128) hash[16];
x16rv2_context_overlay ctx;
void *in = (void*) input;
int size = 80;
/*
if ( s_ntime == UINT32_MAX )
{
const uint8_t* in8 = (uint8_t*) input;
x16_r_s_getAlgoString( &in8[4], hashOrder );
}
*/
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case BLAKE:
sph_blake512_init( &ctx.blake );
sph_blake512( &ctx.blake, in, size );
sph_blake512_close( &ctx.blake, hash );
break;
case BMW:
sph_bmw512_init( &ctx.bmw );
sph_bmw512(&ctx.bmw, in, size);
sph_bmw512_close(&ctx.bmw, hash);
break;
case GROESTL:
#if defined(__AES__)
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const char*)in, size<<3 );
#else
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, in, size );
sph_groestl512_close(&ctx.groestl, hash);
#endif
break;
case SKEIN:
sph_skein512_init( &ctx.skein );
sph_skein512( &ctx.skein, in, size );
sph_skein512_close( &ctx.skein, hash );
break;
case JH:
sph_jh512_init( &ctx.jh );
sph_jh512(&ctx.jh, in, size );
sph_jh512_close(&ctx.jh, hash );
break;
case KECCAK:
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in, size );
sph_tiger_close( &ctx.tiger, hash );
padtiger512( hash );
sph_keccak512_init( &ctx.keccak );
sph_keccak512( &ctx.keccak, hash, 64 );
sph_keccak512_close( &ctx.keccak, hash );
break;
case LUFFA:
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in, size );
sph_tiger_close( &ctx.tiger, hash );
padtiger512( hash );
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)hash, 64 );
break;
case CUBEHASH:
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
(const byte*)in, size );
break;
case SHAVITE:
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in, size );
sph_shavite512_close( &ctx.shavite, hash );
break;
case SIMD:
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence*)in, size<<3 );
break;
case ECHO:
#if defined(__AES__)
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash,
(const BitSequence*)in, size<<3 );
#else
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, in, size );
sph_echo512_close( &ctx.echo, hash );
#endif
break;
case HAMSI:
sph_hamsi512_init( &ctx.hamsi );
sph_hamsi512( &ctx.hamsi, in, size );
sph_hamsi512_close( &ctx.hamsi, hash );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in, size );
sph_fugue512_close( &ctx.fugue, hash );
break;
case SHABAL:
sph_shabal512_init( &ctx.shabal );
sph_shabal512( &ctx.shabal, in, size );
sph_shabal512_close( &ctx.shabal, hash );
break;
case WHIRLPOOL:
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in, size );
sph_whirlpool_close( &ctx.whirlpool, hash );
break;
case SHA_512:
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in, size );
sph_tiger_close( &ctx.tiger, hash );
padtiger512( hash );
SHA512_Init( &ctx.sha512 );
SHA512_Update( &ctx.sha512, hash, 64 );
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
break;
}
in = (void*) hash;
size = 64;
}
memcpy(output, hash, 32);
}
int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(128) hash32[8];
uint32_t _ALIGN(128) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
if ( s_ntime != pdata[17] )
{
uint32_t ntime = swab32(pdata[17]);
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
}
if ( opt_benchmark )
ptarget[7] = 0x0cff;
do
{
be32enc( &endiandata[19], nonce );
x16rv2_hash( hash32, endiandata );
if ( hash32[7] <= Htarg )
if (fulltest( hash32, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
submit_solution( work, hash32, mythr );
}
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}

View File

@@ -1,10 +1,5 @@
#include "xevan-gate.h"
void xevan_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
}
bool register_xevan_algo( algo_gate_t* gate )
{
#if defined (XEVAN_4WAY)
@@ -17,8 +12,8 @@ bool register_xevan_algo( algo_gate_t* gate )
gate->hash = (void*)&xevan_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->set_target = (void*)&xevan_set_target;
gate->get_max64 = (void*)&get_max64_0xffffLL;
opt_target_factor = 256.0;
return true;
};

View File

@@ -26,9 +26,9 @@ bool register_x20r_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x20r;
gate->hash = (void*)&x20r_hash;
#endif
gate->set_target = (void*)&alt_set_target;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
x20_r_s_getAlgoString = (void*)&x20r_getAlgoString;
opt_target_factor = 256.;
return true;
};

View File

@@ -431,7 +431,7 @@ void yescrypt_gate_base(algo_gate_t *gate )
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_yescrypt;
gate->hash = (void*)&yescrypt_hash;
gate->set_target = (void*)&scrypt_set_target;
opt_target_factor = 65536.0;
}
bool register_yescrypt_algo( algo_gate_t* gate )
@@ -458,11 +458,10 @@ bool register_yescrypt_algo( algo_gate_t* gate )
YESCRYPT_P = 1;
applog(LOG_NOTICE,"Yescrypt parameters: N= %d, R= %d.", YESCRYPT_N,
YESCRYPT_R );
applog( LOG_NOTICE,"Yescrypt parameters: N= %d, R= %d.", YESCRYPT_N,
YESCRYPT_R );
if ( yescrypt_client_key )
applog(LOG_NOTICE,"Key= ""%s"", len= %d.\n", yescrypt_client_key,
yescrypt_client_key_len );
applog( LOG_NOTICE,"Key= \"%s\"\n", yescrypt_client_key );
return true;
}

View File

@@ -96,17 +96,16 @@ bool register_yespower_algo( algo_gate_t* gate )
yespower_params.perslen = 0;
}
applog(LOG_NOTICE,"Yespower parameters: N= %d, R= %d.", yespower_params.N,
yespower_params.r );
applog( LOG_NOTICE,"Yespower parameters: N= %d, R= %d.", yespower_params.N,
yespower_params.r );
if ( yespower_params.pers )
applog(LOG_NOTICE,"Key= ""%s"", len= %d.\n", yespower_params.pers,
(int)yespower_params.perslen );
applog( LOG_NOTICE,"Key= \"%s\"\n", yespower_params.pers );
gate->optimizations = SSE2_OPT;
gate->get_max64 = (void*)&yespower_get_max64;
gate->scanhash = (void*)&scanhash_yespower;
gate->hash = (void*)&yespower_hash;
gate->set_target = (void*)&scrypt_set_target;
opt_target_factor = 65536.0;
return true;
};
@@ -121,7 +120,7 @@ bool register_yespowerr16_algo( algo_gate_t* gate )
gate->get_max64 = (void*)&yespower_get_max64;
gate->scanhash = (void*)&scanhash_yespower;
gate->hash = (void*)&yespower_hash;
gate->set_target = (void*)&scrypt_set_target;
opt_target_factor = 65536.0;
return true;
};
@@ -140,13 +139,13 @@ bool register_yescrypt_05_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_yespower;
gate->set_target = (void*)&scrypt_set_target;
gate->get_max64 = (void*)&yescrypt_05_get_max64;
yespower_params.version = YESPOWER_0_5;
yespower_params.N = 2048;
yespower_params.r = 8;
yespower_params.pers = NULL;
yespower_params.perslen = 0;
opt_target_factor = 65536.0;
return true;
}
@@ -154,13 +153,13 @@ bool register_yescryptr8_05_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_yespower;
gate->set_target = (void*)&scrypt_set_target;
gate->get_max64 = (void*)&yescrypt_05_get_max64;
yespower_params.version = YESPOWER_0_5;
yespower_params.N = 2048;
yespower_params.r = 8;
yespower_params.pers = "Client Key";
yespower_params.perslen = 10;
opt_target_factor = 65536.0;
return true;
}
@@ -168,13 +167,13 @@ bool register_yescryptr16_05_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_yespower;
gate->set_target = (void*)&scrypt_set_target;
gate->get_max64 = (void*)&yescryptr16_05_get_max64;
yespower_params.version = YESPOWER_0_5;
yespower_params.N = 4096;
yespower_params.r = 16;
yespower_params.pers = NULL;
yespower_params.perslen = 0;
opt_target_factor = 65536.0;
return true;
}
@@ -182,13 +181,13 @@ bool register_yescryptr32_05_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_yespower;
gate->set_target = (void*)&scrypt_set_target;
gate->get_max64 = (void*)&yescryptr16_05_get_max64;
yespower_params.version = YESPOWER_0_5;
yespower_params.N = 4096;
yespower_params.r = 32;
yespower_params.pers = "WaviBanana";
yespower_params.perslen = 10;
opt_target_factor = 65536.0;
return true;
}

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.7.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.8.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.9.7'
PACKAGE_STRING='cpuminer-opt 3.9.7'
PACKAGE_VERSION='3.9.8'
PACKAGE_STRING='cpuminer-opt 3.9.8'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.9.7 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.9.8 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.9.7:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.9.8:";;
esac
cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.9.7
cpuminer-opt configure 3.9.8
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.9.7, which was
It was created by cpuminer-opt $as_me 3.9.8, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.9.7'
VERSION='3.9.8'
cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.9.7, which was
This file was extended by cpuminer-opt $as_me 3.9.8, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.9.7
cpuminer-opt config.status 3.9.8
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.9.7])
AC_INIT([cpuminer-opt], [3.9.8])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -142,10 +142,10 @@ size_t rpc2_bloblen = 0;
uint32_t rpc2_target = 0;
char *rpc2_job_id = NULL;
double opt_diff_factor = 1.0;
double opt_target_factor = 1.0;
uint32_t zr5_pok = 0;
bool opt_stratum_stats = false;
bool opt_hash_meter = false;
uint32_t accepted_share_count = 0ULL;
uint32_t rejected_share_count = 0ULL;
uint32_t solved_block_count = 0ULL;
@@ -826,39 +826,60 @@ void scale_hash_for_display ( double* hashrate, char* units )
{ *units = 'P'; *hashrate /= 1e15; }
}
void format_hms( char *s, uint64_t t )
{
// 00h00m00s
uint64_t rem;
uint64_t sec = t % 60;
rem = t / 60;
uint64_t min = rem % 60;
uint64_t hrs = rem / 60;
sprintf( s, "%luh%02lum%02lus", hrs, min, sec );
}
// Bitcoin formula for converting a share's difficulty to an equivalent
// number of hashes.
//
// https://en.bitcoin.it/wiki/Difficulty
//
// H = D * 2**48 / 0xffff
// = D * 2**32
// hash = diff * 2**48 / 0xffff
// = diff * 2**32
//
// That formula doesn't seem to be accurate but an adjustment to the
// constant produces correct results.
// The formula seems to work fine when calculating TTF based on difficulty
// (network or share) and the miner's calculated hash rate.
//
// The formula used is:
// time = diff * 2**32 * hashrate
//
// hash = sharediff * 2**48 / 0x3fff
// = sharediff * 2**30
// = sharediff * diff2hash
// But it's off by a factor of 4 when calculating the effective hash rate
// based on share based on difficulty and time.
//
// The modified formula used is:
//
// hash = diff * 2**48 / 0x3fff
// = diff * 2**30
//
// Two constants are defined for the same purpose and used as appropriate
// to produce expected, presumed correct, results. Not very scientific
// but the results speak for themselves.
//
// diff_to_hash = 2**32 = 0x100000000 = 4294967296;
// diff_to_hash_hack = 2**30 = 0x40000000 = 1073741824;
const uint64_t diff2hash = 0x40000000ULL;
const double diff_to_hash = 4294967296.;
const double diff_to_hash_hack = 1073741824.;
static struct timeval five_min_start;
static double shash_sum = 0.;
static double bhash_sum = 0.;
static double time_sum = 0.;
static double latency_sum = 0.;
static uint64_t submit_sum = 0;
static uint64_t reject_sum = 0;
static uint32_t last_bloc_height = 0;
struct share_stats_t
{
struct timeval submit_time;
double net_diff;
double share_diff;
char job_id[32];
};
// with more and more parallelism the chances of submitting multiple
@@ -876,20 +897,16 @@ static inline int stats_ptr_incr( int p )
static int share_result( int result, struct work *null_work,
const char *reason )
{
double share_time = 0., share_hash = 0., block_hash = 0., share_size = 0.;
double share_time = 0., share_hash = 0., block_hash = 0., share_ratio = 0.;
double hashcount = 0., hashrate = 0.;
uint64_t latency = 0;
int latency = 0;
struct share_stats_t my_stats = {0};
struct timeval ack_time, latency_tv, et;
char hr[32];
char hr_units[4] = {0};
char shr[32];
char shr_units[4] = {0};
char diffstr[32];
const char *sres = NULL;
bool solved = false;
// Mutex while we grab asnapshot of the global counters.
// Mutex while we grab a snapshot of the global counters.
pthread_mutex_lock( &stats_lock );
// When submit_work detects a buffer overflow it discards the stats for
@@ -930,9 +947,9 @@ static int share_result( int result, struct work *null_work,
}
// calculate share hashrate and size
share_hash = my_stats.share_diff * diff2hash;
block_hash = my_stats.net_diff * diff2hash;
share_size = block_hash == 0. ? 0. : share_hash / block_hash * 100.;
share_hash = my_stats.share_diff * diff_to_hash_hack;
block_hash = my_stats.net_diff * diff_to_hash_hack;
share_ratio = block_hash == 0. ? 0. : share_hash / block_hash * 100.;
// check result
result ? accepted_share_count++ : rejected_share_count++;
@@ -943,8 +960,6 @@ static int share_result( int result, struct work *null_work,
// update counters for 5 minute summary report
pthread_mutex_lock( &stats_lock );
shash_sum += share_hash;
bhash_sum += block_hash;
time_sum += share_time;
submit_sum ++;
reject_sum += (uint64_t)!result;
@@ -952,73 +967,22 @@ static int share_result( int result, struct work *null_work,
pthread_mutex_unlock( &stats_lock );
double share_hash_rate = share_time == 0. ? 0. : share_hash / share_time;
double scaled_shr;
scaled_shr = share_hash_rate;
scale_hash_for_display ( &scaled_shr, shr_units );
if ( use_colors )
{
sres = ( solved ? ( CL_MAG "BLOCK SOLVED" CL_WHT )
: result ? ( CL_GRN "Accepted" CL_WHT )
: ( CL_RED "Rejected" CL_WHT ) );
// colour code the share diff to highlight high value.
if ( solved )
sprintf( diffstr, "%s%.3g%s", CL_MAG, my_stats.share_diff, CL_WHT );
else if ( my_stats.share_diff > ( my_stats.net_diff * 0.1 ) )
sprintf( diffstr, "%s%.3g%s", CL_GRN, my_stats.share_diff, CL_WHT );
else if ( my_stats.share_diff > ( my_stats.net_diff * 0.01 ) )
sprintf( diffstr, "%s%.3g%s", CL_CYN, my_stats.share_diff, CL_WHT );
else
sprintf( diffstr, "%.3g", my_stats.share_diff );
if ( hashrate ) // don't colour share hash rate without reference rate.
{
if ( share_hash_rate > 768. * hashrate )
sprintf( shr, "%s%.2f %sH/s%s", CL_MAG, scaled_shr, shr_units,
CL_WHT );
else if ( share_hash_rate > 32. * hashrate )
sprintf( shr, "%s%.2f %sH/s%s", CL_GRN, scaled_shr, shr_units,
CL_WHT );
else if ( share_hash_rate > 2.0 * hashrate )
sprintf( shr, "%s%.2f %sH/s%s", CL_CYN, scaled_shr, shr_units,
CL_WHT );
else if ( share_hash_rate > 0.5 * hashrate )
sprintf( shr, "%.2f %sH/s", scaled_shr, shr_units );
else
sprintf( shr, "%s%.2f %sH/s%s", CL_YLW, scaled_shr, shr_units,
CL_WHT );
}
else
sprintf( shr, "%.2f %sH/s", scaled_shr, shr_units );
}
else // monochrome
{
sres = ( solved ? "BLOCK SOLVED" : result ? "Accepted" : "Rejected" );
sprintf( diffstr, "%.3g", my_stats.share_diff );
sprintf( shr, "%.2f %sH/s", scaled_shr, shr_units );
}
sres = solved ? "BLOCK SOLVED" : ( result ? "Accepted" : "Rejected" );
scale_hash_for_display ( &hashrate, hr_units );
if ( hashrate < 10. )
sprintf(hr, "%.4f", hashrate );
else
sprintf(hr, "%.2f", hashrate );
sprintf( diffstr, "%.3g", my_stats.share_diff );
applog( LOG_NOTICE, "%s, diff %s, %.3f secs, A/R/B: %d/%d/%d.",
sres, diffstr, share_time, accepted_share_count,
applog( LOG_NOTICE, "%s, %.3f secs (%dms), A/R/B: %d/%d/%d.",
sres, share_time, latency, accepted_share_count,
rejected_share_count, solved_block_count );
if ( have_stratum && result && !opt_quiet )
{
applog( LOG_NOTICE, "Miner %s %sH/s, Share %s, Latency %d ms.",
hr, hr_units, shr, latency );
applog( LOG_NOTICE, "Height %d, job %s, %.5f%% block share.",
stratum.bloc_height, my_stats.job_id, share_size );
applog(LOG_INFO,"- - - - - - - - - - - - - - - - - - - - - - - - - - -");
}
applog2( LOG_INFO, "Share diff %s (%5f%%), block %d",
diffstr, share_ratio, stratum.bloc_height );
if ( reason )
applog( LOG_WARNING, "reject reason: %s.", reason );
@@ -1570,16 +1534,16 @@ static bool get_work(struct thr_info *thr, struct work *work)
struct work *work_heap;
if (opt_benchmark)
{
{
uint32_t ts = (uint32_t) time(NULL);
// why 74? std cmp_size is 76, std data is 128
// why 74? std cmp_size is 76, std data is 128
for ( int n = 0; n < 74; n++ ) ( (char*)work->data )[n] = n;
work->data[algo_gate.ntime_index] = swab32(ts); // ntime
work->data[algo_gate.ntime_index] = swab32(ts); // ntime
// this overwrites much of the for loop init
memset( work->data + algo_gate.nonce_index, 0x00, 52); // nonce..nonce+52
// this overwrites much of the for loop init
memset( work->data + algo_gate.nonce_index, 0x00, 52); // nonce..nonce+52
work->data[20] = 0x80000000; // extraheader not used for jr2
work->data[31] = 0x00000280; // extraheader not used for jr2
return true;
@@ -1606,26 +1570,10 @@ static bool get_work(struct thr_info *thr, struct work *work)
return true;
}
bool submit_work(struct thr_info *thr, const struct work *work_in)
static bool submit_work( struct thr_info *thr, const struct work *work_in )
{
struct workio_cmd *wc;
// collect some share stats
pthread_mutex_lock( &stats_lock );
// if buffer full discard stats and don't increment pointer.
// We're on the clock so let share_result report it.
if ( share_stats[ s_put_ptr ].submit_time.tv_sec == 0 )
{
gettimeofday( &share_stats[ s_put_ptr ].submit_time, NULL );
share_stats[ s_put_ptr ].share_diff = work_in->sharediff;
share_stats[ s_put_ptr ].net_diff = net_diff;
strcpy( share_stats[ s_put_ptr ].job_id, work_in->job_id );
s_put_ptr = stats_ptr_incr( s_put_ptr );
}
pthread_mutex_unlock( &stats_lock );
/* fill out work request message */
wc = (struct workio_cmd *) calloc(1, sizeof(*wc));
if (!wc)
@@ -1646,6 +1594,76 @@ err_out:
return false;
}
static inline double u256_to_double( const uint64_t* u )
{
const double f = 4294967296.0 * 4294967296.0; // 2**64
return u[0] + f * ( u[1] + f * ( u[2] + f * u[3] ) );
}
void work_set_target_ratio( struct work* work, uint32_t* hash )
{
double dhash;
if ( opt_showdiff )
{
dhash = u256_to_double( (const uint64_t*)hash );
if ( dhash > 0. )
work->sharediff = work->targetdiff *
u256_to_double( (const uint64_t*)( work->target ) ) / dhash;
else
work->sharediff = 0.;
}
else
work->sharediff = 0.;
// collect some share stats
pthread_mutex_lock( &stats_lock );
// if buffer full discard the stats and don't increment pointer.
// We're on the clock so let share_result report it.
if ( share_stats[ s_put_ptr ].submit_time.tv_sec == 0 )
{
gettimeofday( &share_stats[ s_put_ptr ].submit_time, NULL );
share_stats[ s_put_ptr ].share_diff = work->sharediff;
share_stats[ s_put_ptr ].net_diff = net_diff;
s_put_ptr = stats_ptr_incr( s_put_ptr );
}
pthread_mutex_unlock( &stats_lock );
}
bool submit_solution( struct work *work, void *hash,
struct thr_info *thr )
{
if ( submit_work( thr, work ) )
{
work_set_target_ratio( work, hash );
if ( !opt_quiet )
applog( LOG_BLUE, "Share %d submitted by thread %d",
accepted_share_count + rejected_share_count + 1, thr->id );
return true;
}
else
applog( LOG_WARNING, "Failed to submit share." );
return false;
}
bool submit_lane_solution( struct work *work, void *hash,
struct thr_info *thr, int lane )
{
if ( submit_work( thr, work ) )
{
work_set_target_ratio( work, hash );
if ( !opt_quiet )
applog( LOG_BLUE, "Share %d submitted by thread %d, lane %d",
accepted_share_count + rejected_share_count + 1, thr->id, lane );
return true;
}
else
applog( LOG_WARNING, "Failed to submit share." );
return false;
}
bool rpc2_stratum_job( struct stratum_ctx *sctx, json_t *params )
{
bool ret = false;
@@ -1733,22 +1751,6 @@ void SHA256_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
}
}
// default
void std_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / opt_diff_factor );
}
// most scrypt based algos
void scrypt_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (65536.0 * opt_diff_factor) );
}
// another popular choice.
void alt_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
}
// Default is do_nothing (assumed LE)
void set_work_data_big_endian( struct work *work )
{
@@ -1911,39 +1913,27 @@ static void *miner_thread( void *userdata )
drop_policy();
}
// CPU thread affinity
/*
if ( num_cpus > 64 )
{
// opt_affinity ignored with more than 64 cpus.
if (opt_debug)
applog( LOG_DEBUG, "Binding thread %d to cpu %d",
thr_id, thr_id % num_cpus );
affine_to_cpu_mask( thr_id, -1 );
}
else
*/
if ( num_cpus > 1 )
{
#if AFFINITY_USES_UINT128
// Default affinity
if ( (opt_affinity == (uint128_t)(-1) ) && opt_n_threads > 1 )
{
// Default affinity
if ( (opt_affinity == (uint128_t)(-1) ) && opt_n_threads > 1 )
{
if ( opt_debug )
applog( LOG_DEBUG, "Binding thread %d to cpu %d.",
thr_id, thr_id % num_cpus,
u128_hi64( (uint128_t)1 << (thr_id % num_cpus) ),
u128_lo64( (uint128_t)1 << (thr_id % num_cpus) ) );
affine_to_cpu_mask( thr_id, (uint128_t)1 << (thr_id % num_cpus) );
}
}
#else
if ( (opt_affinity == -1LL) && opt_n_threads > 1 )
{
if ( (opt_affinity == -1LL) && opt_n_threads > 1 )
{
if (opt_debug)
applog( LOG_DEBUG, "Binding thread %d to cpu %d.",
thr_id, thr_id % num_cpus, 1LL << (thr_id % num_cpus)) ;
affine_to_cpu_mask( thr_id, 1ULL << (thr_id % num_cpus) );
}
}
#endif
else // Custom affinity
{
@@ -2016,7 +2006,7 @@ static void *miner_thread( void *userdata )
if (!wanna_mine(thr_id))
{
sleep(5);
continue;
continue;
}
// adjust max_nonce to meet target scan time
if (have_stratum)
@@ -2048,7 +2038,7 @@ static void *miner_thread( void *userdata )
"Mining timeout of %ds reached, exiting...", opt_time_limit);
proper_exit(0);
}
if (remain < max64) max64 = remain;
if ( remain < max64 ) max64 = remain;
}
// max64
uint32_t work_nonce = *( algo_gate.get_nonceptr( work.data ) );
@@ -2118,31 +2108,26 @@ static void *miner_thread( void *userdata )
else
{
// collect and reset global counters
double hash = shash_sum; shash_sum = 0.;
double bhash = bhash_sum; bhash_sum = 0.;
double time = time_sum; time_sum = 0.;
uint64_t submits = submit_sum; submit_sum = 0;
uint64_t rejects = reject_sum; reject_sum = 0;
uint64_t latency = latency_sum; latency_sum = 0;
int latency = latency_sum; latency_sum = 0;
memcpy( &five_min_start, &time_now, sizeof time_now );
pthread_mutex_unlock( &stats_lock );
double ghrate = global_hashrate;
double scaled_ghrate = ghrate;
double shrate = time == 0. ? 0. : hash / time;
double shrate = time == 0. ? 0. : diff_to_hash * work.targetdiff
* (double)submits / time;
double scaled_shrate = shrate;
double avg_share = bhash == 0. ? 0. : hash / bhash * 100.;
uint64_t avg_latency = 0;
int avg_latency = 0;
double latency_pc = 0.;
double rejects_pc = 0.;
double submit_rate = 0.;
char shr[32];
char shr_units[4] = {0};
char ghr[32];
char ghr_units[4] = {0};
int temp = cpu_temp(0);
char tempstr[32];
if ( submits )
avg_latency = latency / submits;
@@ -2150,73 +2135,44 @@ static void *miner_thread( void *userdata )
if ( time != 0. )
{
submit_rate = (double)submits*60. / time;
rejects_pc = (double)rejects / (time*10.);
latency_pc = (double)latency / ( time*10.);
}
scale_hash_for_display( &scaled_shrate, shr_units );
scale_hash_for_display( &scaled_ghrate, ghr_units );
sprintf( ghr, "%.2f %sH/s", scaled_ghrate, ghr_units );
sprintf( ghr, "%.2f %sh/s", scaled_ghrate, ghr_units );
if ( use_colors )
sprintf( shr, "%.2f %sh/s", scaled_shrate, shr_units );
applog( LOG_NOTICE,
"Submitted %d shares in %dm%02ds (%.2f /min), %ld rejected",
(uint64_t)submits, et.tv_sec / 60, et.tv_sec % 60,
submit_rate, rejects );
applog2( LOG_INFO, "Share eqv: %s, miner ref: %s", shr, ghr );
#if ((defined(_WIN64) || defined(__WINDOWS__)))
applog2( LOG_INFO, "Network latency %d ms (%.2f%%)",
avg_latency, latency_pc );
#else
int temp = cpu_temp(0);
char tempstr[32];
if ( use_colors && ( temp >= 70 ) )
{
if ( shrate > (128.*ghrate) )
sprintf( shr, "%s%.2f %sH/s%s", CL_MAG, scaled_shrate,
shr_units, CL_WHT );
else if ( shrate > (16.*ghrate) )
sprintf( shr, "%s%.2f %sH/s%s", CL_GRN, scaled_shrate,
shr_units, CL_WHT );
else if ( shrate > 2.0*ghrate )
sprintf( shr, "%s%.2f %sH/s%s", CL_CYN, scaled_shrate,
shr_units, CL_WHT );
else if ( shrate > 0.5*ghrate )
sprintf( shr, "%.2f %sH/s", scaled_shrate, shr_units );
else
sprintf( shr, "%s%.2f %sH/s%s", CL_YLW, scaled_shrate,
shr_units, CL_WHT );
if ( temp >= 80 ) sprintf( tempstr, "%s%d C%s",
CL_RED, temp, CL_WHT );
else if (temp >=70 ) sprintf( tempstr, "%s%d C%s",
CL_YLW, temp, CL_WHT );
else sprintf( tempstr, "%d C", temp );
if ( temp >= 80 ) sprintf( tempstr, "%sCPU temp %d C%s",
CL_WHT CL_RED, temp, CL_N );
else sprintf( tempstr, "%sCPU temp %d C%s",
CL_WHT CL_YLW, temp, CL_N );
}
else
{
sprintf( shr, "%.2f %sH/s", scaled_shrate, shr_units );
sprintf( tempstr, "%d C", temp );
}
sprintf( tempstr, "CPU temp %d C", temp );
applog(LOG_NOTICE,"Submitted %d shares in %dm%02ds.",
(uint64_t)submits, et.tv_sec / 60, et.tv_sec % 60 );
applog(LOG_NOTICE,"%d rejects (%.2f%%), %.5f%% block share.",
rejects, rejects_pc, avg_share );
applog(LOG_NOTICE,"Avg hashrate: Miner %s, Share %s.", ghr, shr );
#if ((defined(_WIN64) || defined(__WINDOWS__)))
applog(LOG_NOTICE,"Shares/min: %.2f, latency %d ms (%.2f%%).",
submit_rate, avg_latency, latency_pc );
#else
applog(LOG_NOTICE,"Shares/min: %.2f, latency %d ms (%.2f%%), temp: %s.",
submit_rate, avg_latency, latency_pc, tempstr );
applog2( LOG_INFO, "Network latency %d ms (%.2f%%), %s",
avg_latency, latency_pc, tempstr );
#endif
/*
applog(LOG_NOTICE,"Submitted %d shares in %dm%02ds, %.5f%% block share.",
(uint64_t)submits, et.tv_sec / 60, et.tv_sec % 60, avg_share );
#if ((defined(_WIN64) || defined(__WINDOWS__)))
applog(LOG_NOTICE,"Share hashrate %s, latency %d ms (%.2f%%).",
shr, avg_latency, latency_pc );
#else
applog(LOG_NOTICE,"Share hashrate %s, latency %d ms (%.2f%%), temp %s.",
shr, avg_latency, latency_pc, tempstr );
#endif
*/
applog(LOG_INFO,"- - - - - - - - - - - - - - - - - - - - - - - - - - -");
}
} // 5 minute summary
// display hashrate
if ( !opt_quiet )
@@ -2240,10 +2196,11 @@ static void *miner_thread( void *userdata )
else // no fractions of a hash
sprintf( hc, "%.0f", hashcount );
sprintf( hr, "%.2f", hashrate );
applog( LOG_INFO, "CPU #%d: %s %sH, %s %sH/s",
applog( LOG_INFO, "CPU #%d: %s %sh, %s %sh/s",
thr_id, hc, hc_units, hr, hr_units );
}
}
/*
if ( thr_id == 0 && !opt_benchmark )
{
hashcount = 0.;
@@ -2262,10 +2219,9 @@ static void *miner_thread( void *userdata )
else // no fractions of a hash
sprintf( hc, "%.0f", hashcount );
sprintf( hr, "%.2f", hashrate );
applog( LOG_NOTICE, "Miner perf: %s %sH, %s %sH/s.",
hc, hc_units, hr, hr_units );
}
}
*/
}
// Display benchmark total
@@ -2594,43 +2550,54 @@ void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
{
pthread_mutex_lock( &sctx->work_lock );
free( g_work->job_id );
g_work->job_id = strdup( sctx->job.job_id );
g_work->xnonce2_len = sctx->xnonce2_size;
g_work->xnonce2 = (uchar*) realloc( g_work->xnonce2, sctx->xnonce2_size );
memcpy( g_work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size );
algo_gate.build_extraheader( g_work, sctx );
net_diff = algo_gate.calc_network_diff( g_work );
algo_gate.set_work_data_endian( g_work );
pthread_mutex_unlock( &sctx->work_lock );
// if ( !opt_quiet )
// applog( LOG_BLUE,"New job %s.", g_work->job_id );
work_set_target( g_work, sctx->job.diff
/ ( opt_target_factor * opt_diff_factor ) );
if ( opt_debug )
{
unsigned char *xnonce2str = abin2hex( g_work->xnonce2,
unsigned char *xnonce2str = abin2hex( g_work->xnonce2,
g_work->xnonce2_len );
applog( LOG_DEBUG, "DEBUG: job_id='%s' extranonce2=%s ntime=%08x",
applog( LOG_DEBUG, "DEBUG: job_id='%s' extranonce2=%s ntime=%08x",
g_work->job_id, xnonce2str, swab32( g_work->data[17] ) );
free( xnonce2str );
free( xnonce2str );
}
algo_gate.set_target( g_work, sctx->job.diff );
if ( stratum_diff != sctx->job.diff )
// Log new block
if ( ( stratum_diff != sctx->job.diff )
|| ( last_bloc_height != sctx->bloc_height ) )
{
// char sdiff[32] = { 0 };
// store for api stats
stratum_diff = sctx->job.diff;
if ( !opt_quiet && opt_showdiff && g_work->targetdiff != stratum_diff )
{
// snprintf( sdiff, 32, " (%.5f)", g_work->targetdiff );
applog( LOG_BLUE, "Stratum difficulty set to %g", stratum_diff );
// sdiff );
}
stratum_diff = sctx->job.diff;
last_bloc_height = stratum.bloc_height;
double target_diff = g_work->targetdiff;
double hr = global_hashrate;
char shr[32];
char hr_units[4] = {0};
char block_ttf[32];
char share_ttf[32];
format_hms( block_ttf, net_diff * diff_to_hash / hr );
format_hms( share_ttf, target_diff * diff_to_hash / hr );
scale_hash_for_display ( &hr, hr_units );
sprintf( shr, "%.2f %sh/s", hr, hr_units );
applog( LOG_BLUE, "%s %s block %d", short_url,
algo_names[opt_algo], stratum.bloc_height );
applog2( LOG_INFO, "Diff: net %g, stratum %g, target %g",
net_diff, stratum_diff, target_diff );
applog2( LOG_INFO, "TTF @ %s: block %s, share %s.",
shr, block_ttf, share_ttf );
}
}
@@ -2714,6 +2681,7 @@ static void *stratum_thread(void *userdata )
if ( last_bloc_height != stratum.bloc_height )
{
last_bloc_height = stratum.bloc_height;
/*
if ( !opt_quiet )
{
if ( net_diff > 0. )
@@ -2726,9 +2694,11 @@ static void *stratum_thread(void *userdata )
short_url, algo_names[opt_algo],
stratum.bloc_height, g_work.job_id );
}
*/
}
else if ( !opt_quiet )
applog( LOG_BLUE,"New job %s.", g_work.job_id );
// else if ( !opt_quiet )
// applog( LOG_BLUE,"New job %s.", g_work.job_id );
}
else if (opt_debug && !opt_quiet)
{

33
miner.h
View File

@@ -310,6 +310,7 @@ struct thr_api {
#define CL_WHT "\x1B[01;37m" /* white */
void applog(int prio, const char *fmt, ...);
void applog2(int prio, const char *fmt, ...);
void restart_threads(void);
extern json_t *json_rpc_call( CURL *curl, const char *url, const char *userpass,
const char *rpc_req, int *curl_err, int flags );
@@ -331,6 +332,24 @@ extern void diff_to_target(uint32_t *target, double diff);
double hash_target_ratio( uint32_t* hash, uint32_t* target );
void work_set_target_ratio( struct work* work, uint32_t* hash );
struct thr_info {
int id;
pthread_t pth;
pthread_attr_t attr;
struct thread_q *q;
struct cpu_info cpu;
};
//struct thr_info *thr_info;
bool submit_solution( struct work *work, void *hash,
struct thr_info *thr );
bool submit_lane_solution( struct work *work, void *hash,
struct thr_info *thr, int lane );
//bool submit_work( struct thr_info *thr, const struct work *work_in );
void get_currentalgo( char* buf, int sz );
bool has_sha();
@@ -355,7 +374,7 @@ struct work {
uint32_t target[8];
double targetdiff;
double shareratio;
// double shareratio;
double sharediff;
int height;
@@ -471,6 +490,7 @@ void print_hash_tests(void);
void scale_hash_for_display ( double* hashrate, char* units );
/*
struct thr_info {
int id;
pthread_t pth;
@@ -478,6 +498,7 @@ struct thr_info {
struct thread_q *q;
struct cpu_info cpu;
};
*/
struct work_restart {
volatile uint8_t restart;
@@ -578,6 +599,7 @@ enum algos {
ALGO_X14,
ALGO_X15,
ALGO_X16R,
ALGO_X16RV2,
ALGO_X16RT,
ALGO_X16RT_VEIL,
ALGO_X16S,
@@ -672,6 +694,7 @@ static const char* const algo_names[] = {
"x14",
"x15",
"x16r",
"x16rv2",
"x16rt",
"x16rt-veil",
"x16s",
@@ -733,6 +756,7 @@ extern int opt_param_n;
extern int opt_param_r;
extern char* opt_param_key;
extern double opt_diff_factor;
extern double opt_target_factor;
extern bool opt_randomize;
extern bool allow_mininginfo;
extern time_t g_work_time;
@@ -798,8 +822,8 @@ Options:\n\
neoscrypt NeoScrypt(128, 2, 1)\n\
nist5 Nist5\n\
pentablake 5 x blake512\n\
phi1612 phi, LUX coin (original algo)\n\
phi2 LUX (new algo)\n\
phi1612 phi\n\
phi2 Luxcoin (LUX)\n\
pluck Pluck:128 (Supcoin)\n\
polytimos\n\
quark Quark\n\
@@ -831,7 +855,8 @@ Options:\n\
x13sm3 hsr (Hshare)\n\
x14 X14\n\
x15 X15\n\
x16r Ravencoin (RVN)\n\
x16r\n\
x16rv2 Ravencoin (RVN)\n\
x16rt Gincoin (GIN)\n\
x16rt-veil Veil (VEIL)\n\
x16s Pigeoncoin (PGN)\n\

View File

@@ -483,7 +483,9 @@ static inline void mm256_bswap32_intrlv80_8x32( void *d, void *src )
const __m256i three = _mm256_add_epi32( two, one );
const __m256i four = _mm256_add_epi32( two, two );
casti_m256i( d, 0 ) = _mm256_permutevar8x32_epi32( s0, m256_zero );
casti_m256i( d, 0 ) = _mm256_broadcastd_epi32(
_mm256_castsi256_si128( s0 ) );
// casti_m256i( d, 0 ) = _mm256_permutevar8x32_epi32( s0, m256_zero );
casti_m256i( d, 1 ) = _mm256_permutevar8x32_epi32( s0, one );
casti_m256i( d, 2 ) = _mm256_permutevar8x32_epi32( s0, two );
casti_m256i( d, 3 ) = _mm256_permutevar8x32_epi32( s0, three );
@@ -494,7 +496,9 @@ static inline void mm256_bswap32_intrlv80_8x32( void *d, void *src )
_mm256_add_epi32( four, two ) );
casti_m256i( d, 7 ) = _mm256_permutevar8x32_epi32( s0,
_mm256_add_epi32( four, three ) );
casti_m256i( d, 8 ) = _mm256_permutevar8x32_epi32( s1, m256_zero );
casti_m256i( d, 8 ) = _mm256_broadcastd_epi32(
_mm256_castsi256_si128( s1 ) );
// casti_m256i( d, 8 ) = _mm256_permutevar8x32_epi32( s1, m256_zero );
casti_m256i( d, 9 ) = _mm256_permutevar8x32_epi32( s1, one );
casti_m256i( d,10 ) = _mm256_permutevar8x32_epi32( s1, two );
casti_m256i( d,11 ) = _mm256_permutevar8x32_epi32( s1, three );
@@ -505,8 +509,9 @@ static inline void mm256_bswap32_intrlv80_8x32( void *d, void *src )
_mm256_add_epi32( four, two ) );
casti_m256i( d,15 ) = _mm256_permutevar8x32_epi32( s1,
_mm256_add_epi32( four, three ) );
casti_m256i( d,16 ) = _mm256_permutevar8x32_epi32(
_mm256_castsi128_si256( s2 ), m256_zero );
casti_m256i( d,16 ) = _mm256_broadcastd_epi32( s2 );
// casti_m256i( d,16 ) = _mm256_permutevar8x32_epi32(
// _mm256_castsi128_si256( s2 ), m256_zero );
casti_m256i( d,17 ) = _mm256_permutevar8x32_epi32(
_mm256_castsi128_si256( s2 ), one );
casti_m256i( d,18 ) = _mm256_permutevar8x32_epi32(
@@ -682,7 +687,9 @@ static inline void mm512_bswap32_intrlv80_16x32( void *d, void *src )
const __m512i three = _mm512_add_epi32( two, one );
__m512i x = _mm512_add_epi32( three, three );
casti_m512i( d, 0 ) = _mm512_permutexvar_epi32( s0, m512_zero );
casti_m512i( d, 0 ) = _mm512_broadcastd_epi32(
_mm512_castsi512_si128( s0 ) );
// casti_m512i( d, 0 ) = _mm512_permutexvar_epi32( s0, m512_zero );
casti_m512i( d, 1 ) = _mm512_permutexvar_epi32( s0, one );
casti_m512i( d, 2 ) = _mm512_permutexvar_epi32( s0, two );
casti_m512i( d, 3 ) = _mm512_permutexvar_epi32( s0, three );
@@ -709,8 +716,9 @@ static inline void mm512_bswap32_intrlv80_16x32( void *d, void *src )
_mm512_add_epi32( x, two ) );
casti_m512i( d,15 ) = _mm512_permutexvar_epi32( s0,
_mm512_add_epi32( x, three ) );
casti_m512i( d,16 ) = _mm512_permutexvar_epi32(
_mm512_castsi128_si512( s1 ), m512_zero );
casti_m512i( d,16 ) = _mm512_broadcastd_epi32( s1 );
// casti_m512i( d,16 ) = _mm512_permutexvar_epi32(
// _mm512_castsi128_si512( s1 ), m512_zero );
casti_m512i( d,17 ) = _mm512_permutexvar_epi32(
_mm512_castsi128_si512( s1 ), one );
casti_m512i( d,18 ) = _mm512_permutexvar_epi32(
@@ -987,15 +995,17 @@ static inline void extr_lane_8x64( void *d, const void *s,
static inline void mm512_bswap32_intrlv80_8x64( void *dst, void *src )
{
__m512i *d = (__m512i*)dst;
__m512i s0 = mm512_bswap_32( casti_m512i(src, 0 ) );
__m128i s1 = mm128_bswap_32( casti_m128i(src, 4 ) );
__m512i s0 = mm512_bswap_32( casti_m512i( src, 0 ) );
__m128i s1 = mm128_bswap_32( casti_m128i( src, 4 ) );
// const __m512i zero = m512_zero;
const __m512i one = m512_one_64;
const __m512i two = _mm512_add_epi64( one, one );
const __m512i three = _mm512_add_epi64( two, one );
const __m512i four = _mm512_add_epi64( two, two );
d[0] = _mm512_permutexvar_epi64( s0, m512_zero );
d[0] = _mm512_broadcastq_epi64(
_mm512_castsi512_si128( s0 ) );
// d[0] = _mm512_permutexvar_epi64( s0, m512_zero );
d[1] = _mm512_permutexvar_epi64( s0, one );
d[2] = _mm512_permutexvar_epi64( s0, two );
d[3] = _mm512_permutexvar_epi64( s0, three );
@@ -1003,8 +1013,9 @@ static inline void mm512_bswap32_intrlv80_8x64( void *dst, void *src )
d[5] = _mm512_permutexvar_epi64( s0, _mm512_add_epi64( four, one ) );
d[6] = _mm512_permutexvar_epi64( s0, _mm512_add_epi64( four, two ) );
d[7] = _mm512_permutexvar_epi64( s0, _mm512_add_epi64( four, three ) );
d[8] = _mm512_permutexvar_epi64(
_mm512_castsi128_si512( s1 ), m512_zero );
d[8] = _mm512_broadcastq_epi64( s1 );
// d[8] = _mm512_permutexvar_epi64(
// _mm512_castsi128_si512( s1 ), m512_zero );
d[9] = _mm512_permutexvar_epi64(
_mm512_castsi128_si512( s1 ), one );
}

View File

@@ -42,9 +42,11 @@
static inline __m128i m128_one_128_fn()
{
register uint64_t one = 1;
register __m128i a;
asm( "movq $1, %0\n\t"
: "=x"(a) );
asm( "movq %1, %0\n\t"
: "=x"(a)
: "r" (one) );
return a;
}
#define m128_one_128 m128_one_128_fn()
@@ -54,9 +56,9 @@ static inline __m128i m128_one_64_fn()
register uint64_t one = 1;
register __m128i a;
asm( "movq %1, %0\n\t"
: "=x"(a)
: "r"(one) );
return _mm_shuffle_epi32( a, 0x04 );
: "=x" (a)
: "r" (one) );
return _mm_shuffle_epi32( a, 0x44 );
}
#define m128_one_64 m128_one_64_fn()
@@ -65,8 +67,8 @@ static inline __m128i m128_one_32_fn()
register uint32_t one = 1;
register __m128i a;
asm( "movd %1, %0\n\t"
: "=x"(a)
: "r"(one) );
: "=x" (a)
: "r" (one) );
return _mm_shuffle_epi32( a, 0x00 );
}
#define m128_one_32 m128_one_32_fn()
@@ -76,8 +78,8 @@ static inline __m128i m128_one_16_fn()
register uint32_t one = 0x00010001;
register __m128i a;
asm( "movd %1, %0\n\t"
: "=x"(a)
: "r"(one) );
: "=x" (a)
: "r" (one) );
return _mm_shuffle_epi32( a, 0x00 );
}
#define m128_one_16 m128_one_16_fn()
@@ -87,8 +89,8 @@ static inline __m128i m128_one_8_fn()
register uint32_t one = 0x01010101;
register __m128i a;
asm( "movd %1, %0\n\t"
: "=x"(a)
: "r"(one) );
: "=x" (a)
: "r" (one) );
return _mm_shuffle_epi32( a, 0x00 );
}
#define m128_one_8 m128_one_8_fn()
@@ -97,7 +99,7 @@ static inline __m128i m128_neg1_fn()
{
__m128i a;
asm( "pcmpeqd %0, %0\n\t"
: "=x"(a) );
: "=x" (a) );
return a;
}
#define m128_neg1 m128_neg1_fn()
@@ -108,7 +110,7 @@ static inline __m128i mm128_mov64_128( uint64_t n )
register __m128i a;
asm( "movq %1, %0\n\t"
: "=x" (a)
: "r" (n) );
: "r" (n) );
return a;
}
@@ -117,7 +119,7 @@ static inline __m128i mm128_mov32_128( uint32_t n )
register __m128i a;
asm( "movd %1, %0\n\t"
: "=x" (a)
: "r" (n) );
: "r" (n) );
return a;
}
@@ -126,7 +128,7 @@ static inline uint64_t mm128_mov128_64( __m128i a )
register uint64_t n;
asm( "movq %1, %0\n\t"
: "=x" (n)
: "r" (a) );
: "r" (a) );
return n;
}
@@ -135,10 +137,28 @@ static inline uint32_t mm128_mov128_32( __m128i a )
register uint32_t n;
asm( "movd %1, %0\n\t"
: "=x" (n)
: "r" (a) );
: "r" (a) );
return n;
}
static inline __m128i m128_const1_64( const uint64_t n )
{
register __m128i a;
asm( "movq %1, %0\n\t"
: "=x" (a)
: "r" (n) );
return _mm_shuffle_epi32( a, 0x44 );
}
static inline __m128i m128_const1_32( const uint32_t n )
{
register __m128i a;
asm( "movd %1, %0\n\t"
: "=x" (a)
: "r" (n) );
return _mm_shuffle_epi32( a, 0x00 );
}
#if defined(__SSE41__)
// alternative to _mm_set_epi64x, doesn't use mem,
@@ -148,11 +168,12 @@ static inline __m128i m128_const_64( const uint64_t hi, const uint64_t lo )
register __m128i a;
asm( "movq %2, %0\n\t"
"pinsrq $1, %1, %0\n\t"
: "=x"(a)
: "r"(hi), "r"(lo) );
: "=x" (a)
: "r" (hi), "r" (lo) );
return a;
}
/*
static inline __m128i m128_const1_64( const uint64_t n )
{
register __m128i a;
@@ -162,13 +183,13 @@ static inline __m128i m128_const1_64( const uint64_t n )
: "r"(n) );
return a;
}
*/
#else
// #define m128_one_128 _mm_set_epi64x( 0ULL, 1ULL )
#define m128_const_64 _mm_set_epi64x
#define m128_const1_64 _mm_set1_epi64x
// #define m128_const1_64 _mm_set1_epi64x
#endif
@@ -263,46 +284,6 @@ do { \
#endif
// Gather and scatter data.
// Surprise, they don't use vector instructions. Several reasons why.
// Since scalar data elements are being manipulated scalar instructions
// are most appropriate and can bypass vector registers. They are faster
// and more efficient on a per instruction basis due to the higher clock
// speed and greater avaiability of execution resources. It's good for
// interleaving data buffers for parallel processing.
// May suffer overhead if data is already in a vector register. This can
// usually be easilly avoided by the coder. Sometimes _mm_set is simply better.
// These macros are likely to be used when transposing matrices rather than
// conversions of a single vector.
// Gather data elements into contiguous memory for vector use.
// Source args are appropriately sized value integers, destination arg is a
// type agnostic pointer.
// Vector alignment is not required, though likely. Appropriate integer
// alignment satisfies these macros.
// rewrite using insert
#define mm128_gather_64( d, s0, s1 ) \
((uint64_t*)d)[0] = (uint64_t)s0; \
((uint64_t*)d)[1] = (uint64_t)s1;
#define mm128_gather_32( d, s0, s1, s2, s3 ) \
((uint32_t*)d)[0] = (uint32_t)s0; \
((uint32_t*)d)[1] = (uint32_t)s1; \
((uint32_t*)d)[2] = (uint32_t)s2; \
((uint32_t*)d)[3] = (uint32_t)s3;
// Scatter data from contiguous memory.
#define mm128_scatter_64( d0, d1, s ) \
*( (uint64_t*)d0) = ((uint64_t*)s)[0]; \
*( (uint64_t*)d1) = ((uint64_t*)s)[1];
#define mm128_scatter_32( d0, d1, d2, d3, s ) \
*( (uint32_t*)d0) = ((uint32_t*)s)[0]; \
*( (uint32_t*)d1) = ((uint32_t*)s)[1]; \
*( (uint32_t*)d2) = ((uint32_t*)s)[2]; \
*( (uint32_t*)d3) = ((uint32_t*)s)[3];
// Memory functions
// Mostly for convenience, avoids calculating bytes.
// Assumes data is alinged and integral.

View File

@@ -39,11 +39,20 @@ static inline __m256i m256_const1_64( uint64_t i )
{
register __m128i a;
asm( "movq %1, %0\n\t"
: "=x"(a)
: "r"(i) );
: "=x" (a)
: "r" (i) );
return _mm256_broadcastq_epi64( a );
}
static inline __m256i m256_const1_32( uint32_t i )
{
register __m128i a;
asm( "movd %1, %0\n\t"
: "=x" (a)
: "r" (i) );
return _mm256_broadcastd_epi32( a );
}
#if defined(__AVX2__)
// Don't call the frunction directly, use the macro to make appear like
@@ -142,7 +151,7 @@ do { \
__m128i hi = _mm256_extracti128_si256( src, 1 ); \
a0 = mm256_mov256_64( src ); \
a1 = _mm_extract_epi64( _mm256_castsi256_si128( src ), 1 ); \
a2 = _mm_extract_epi64( hi, 0 ); \
a2 = mm128_mov128_64( hi ); \
a3 = _mm_extract_epi64( hi, 1 ); \
} while(0)
@@ -246,44 +255,6 @@ static inline __m256i mm256_mov32_256( uint32_t n )
#define casto_m256i(p,o) (((__m256i*)(p))+(o))
// Gather scatter
#define mm256_gather_64( d, s0, s1, s2, s3 ) \
((uint64_t*)(d))[0] = (uint64_t)(s0); \
((uint64_t*)(d))[1] = (uint64_t)(s1); \
((uint64_t*)(d))[2] = (uint64_t)(s2); \
((uint64_t*)(d))[3] = (uint64_t)(s3);
#define mm256_gather_32( d, s0, s1, s2, s3, s4, s5, s6, s7 ) \
((uint32_t*)(d))[0] = (uint32_t)(s0); \
((uint32_t*)(d))[1] = (uint32_t)(s1); \
((uint32_t*)(d))[2] = (uint32_t)(s2); \
((uint32_t*)(d))[3] = (uint32_t)(s3); \
((uint32_t*)(d))[4] = (uint32_t)(s4); \
((uint32_t*)(d))[5] = (uint32_t)(s5); \
((uint32_t*)(d))[6] = (uint32_t)(s6); \
((uint32_t*)(d))[7] = (uint32_t)(s7);
// Scatter data from contiguous memory.
// All arguments are pointers
#define mm256_scatter_64( d0, d1, d2, d3, s ) \
*((uint64_t*)(d0)) = ((uint64_t*)(s))[0]; \
*((uint64_t*)(d1)) = ((uint64_t*)(s))[1]; \
*((uint64_t*)(d2)) = ((uint64_t*)(s))[2]; \
*((uint64_t*)(d3)) = ((uint64_t*)(s))[3];
#define mm256_scatter_32( d0, d1, d2, d3, d4, d5, d6, d7, s ) \
*((uint32_t*)(d0)) = ((uint32_t*)(s))[0]; \
*((uint32_t*)(d1)) = ((uint32_t*)(s))[1]; \
*((uint32_t*)(d2)) = ((uint32_t*)(s))[2]; \
*((uint32_t*)(d3)) = ((uint32_t*)(s))[3]; \
*((uint32_t*)(d4)) = ((uint32_t*)(s))[4]; \
*((uint32_t*)(d5)) = ((uint32_t*)(s))[5]; \
*((uint32_t*)(d6)) = ((uint32_t*)(s))[6]; \
*((uint32_t*)(d7)) = ((uint32_t*)(s))[7];
//
// Memory functions
// n = number of 256 bit (32 byte) vectors

View File

@@ -47,7 +47,6 @@
// _mm512_setzero_si512 uses xor instruction. If needed frequently
// in a function is it better to define a register variable (const?)
// initialized to zero.
// It isn't clear to me yet how set or set1 actually work.
#define m512_zero _mm512_setzero_si512()
#define m512_one_512 _mm512_set_epi64( 0ULL, 0ULL, 0ULL, 0ULL, \
@@ -60,10 +59,18 @@
//#define m512_one_8 _mm512_set1_epi8( 1U )
//#define m512_neg1 _mm512_set1_epi64( 0xFFFFFFFFFFFFFFFFULL )
#define mi512_const_64( i7, i6, i5, i4, i3, i2, i1, i0 ) \
_mm512_inserti64x4( _mm512_castsi512_si256( m256_const_64( i3.i2,i1,i0 ) ), \
#define m512_const_64( i7, i6, i5, i4, i3, i2, i1, i0 ) \
_mm512_inserti64x4( _mm512_castsi256_si512( m256_const_64( i3,i2,i1,i0 ) ), \
m256_const_64( i7,i6,i5,i4 ), 1 )
#define m512_const1_64( i ) m256_const_64( i, i, i, i, i, i, i, i )
static inline __m512i m512_const1_64( uint64_t i )
{
register __m128i a;
asm( "movq %1, %0\n\t"
: "=x"(a)
: "r"(i) );
return _mm512_broadcastq_epi64( a );
}
static inline __m512i m512_one_64_fn()
{
@@ -135,15 +142,12 @@ static inline __m512i m512_neg1_fn()
#define mm512_negate_32( x ) _mm512_sub_epi32( m512_zero, x )
#define mm512_negate_16( x ) _mm512_sub_epi16( m512_zero, x )
#define mm256_extr_lo256_512( a ) _mm512_castsi512_si256( a )
#define mm256_extr_hi256_512( a ) _mm512_extracti64x4_epi64( a, 1 )
#define mm128_extr_lo128_512( a ) _mm512_castsi512_si256( a )
//
// Pointer casting
@@ -163,73 +167,9 @@ static inline __m512i m512_neg1_fn()
// returns p+o as pointer to vector
#define casto_m512i(p,o) (((__m512i*)(p))+(o))
// Gather scatter
#define mm512_gather_64( d, s0, s1, s2, s3, s4, s5, s6, s7 ) \
((uint64_t*)(d))[0] = (uint64_t)(s0); \
((uint64_t*)(d))[1] = (uint64_t)(s1); \
((uint64_t*)(d))[2] = (uint64_t)(s2); \
((uint64_t*)(d))[3] = (uint64_t)(s3); \
((uint64_t*)(d))[4] = (uint64_t)(s4); \
((uint64_t*)(d))[5] = (uint64_t)(s5); \
((uint64_t*)(d))[6] = (uint64_t)(s6); \
((uint64_t*)(d))[7] = (uint64_t)(s7);
#define mm512_gather_32( d, s00, s01, s02, s03, s04, s05, s06, s07, \
s08, s09, s10, s11, s12, s13, s14, s15 ) \
((uint32_t*)(d))[ 0] = (uint32_t)(s00); \
((uint32_t*)(d))[ 1] = (uint32_t)(s01); \
((uint32_t*)(d))[ 2] = (uint32_t)(s02); \
((uint32_t*)(d))[ 3] = (uint32_t)(s03); \
((uint32_t*)(d))[ 4] = (uint32_t)(s04); \
((uint32_t*)(d))[ 5] = (uint32_t)(s05); \
((uint32_t*)(d))[ 6] = (uint32_t)(s06); \
((uint32_t*)(d))[ 7] = (uint32_t)(s07); \
((uint32_t*)(d))[ 8] = (uint32_t)(s08); \
((uint32_t*)(d))[ 9] = (uint32_t)(s09); \
((uint32_t*)(d))[10] = (uint32_t)(s10); \
((uint32_t*)(d))[11] = (uint32_t)(s11); \
((uint32_t*)(d))[12] = (uint32_t)(s12); \
((uint32_t*)(d))[13] = (uint32_t)(s13); \
((uint32_t*)(d))[13] = (uint32_t)(s14); \
((uint32_t*)(d))[15] = (uint32_t)(s15);
// Scatter data from contiguous memory.
// All arguments are pointers
#define mm512_scatter_64( d0, d1, d2, d3, d4, d5, d6, d7, s ) \
*((uint64_t*)(d0)) = ((uint64_t*)(s))[0]; \
*((uint64_t*)(d1)) = ((uint64_t*)(s))[1]; \
*((uint64_t*)(d2)) = ((uint64_t*)(s))[2]; \
*((uint64_t*)(d3)) = ((uint64_t*)(s))[3]; \
*((uint64_t*)(d4)) = ((uint64_t*)(s))[4]; \
*((uint64_t*)(d5)) = ((uint64_t*)(s))[5]; \
*((uint64_t*)(d6)) = ((uint64_t*)(s))[6]; \
*((uint64_t*)(d7)) = ((uint64_t*)(s))[7];
#define mm512_scatter_32( d00, d01, d02, d03, d04, d05, d06, d07, \
d08, d09, d10, d11, d12, d13, d14, d15, s ) \
*((uint32_t*)(d00)) = ((uint32_t*)(s))[ 0]; \
*((uint32_t*)(d01)) = ((uint32_t*)(s))[ 1]; \
*((uint32_t*)(d02)) = ((uint32_t*)(s))[ 2]; \
*((uint32_t*)(d03)) = ((uint32_t*)(s))[ 3]; \
*((uint32_t*)(d04)) = ((uint32_t*)(s))[ 4]; \
*((uint32_t*)(d05)) = ((uint32_t*)(s))[ 5]; \
*((uint32_t*)(d06)) = ((uint32_t*)(s))[ 6]; \
*((uint32_t*)(d07)) = ((uint32_t*)(s))[ 7]; \
*((uint32_t*)(d00)) = ((uint32_t*)(s))[ 8]; \
*((uint32_t*)(d01)) = ((uint32_t*)(s))[ 9]; \
*((uint32_t*)(d02)) = ((uint32_t*)(s))[10]; \
*((uint32_t*)(d03)) = ((uint32_t*)(s))[11]; \
*((uint32_t*)(d04)) = ((uint32_t*)(s))[12]; \
*((uint32_t*)(d05)) = ((uint32_t*)(s))[13]; \
*((uint32_t*)(d06)) = ((uint32_t*)(s))[14]; \
*((uint32_t*)(d07)) = ((uint32_t*)(s))[15];
// Add 4 values, fewer dependencies than sequential addition.
#define mm512_add4_64( a, b, c, d ) \
_mm512_add_epi64( _mm512_add_epi64( a, b ), _mm512_add_epi64( c, d ) )
@@ -246,7 +186,6 @@ static inline __m512i m512_neg1_fn()
_mm512_xor_si512( _mm512_xor_si256( a, b ), _mm512_xor_si256( c, d ) )
//
// Bit rotations.
@@ -321,23 +260,26 @@ static inline __m512i m512_neg1_fn()
_mm512_permutex_epi64( v, m512_const_64( 0,1,2,3,4,5,6,7 ) )
#define mm512_invert_32( v ) \
_mm512_permutexvar_epi32( v, _mm512_set_epi32( \
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 ) )
_mm512_permutexvar_epi32( v, m512_const_64( \
0x0000000000000001,0x0000000200000003, \
0x0000000400000005,0x0000000600000007, \
0x0000000800000009,0x0000000a0000000b, \
0x0000000c0000000d,0x0000000e0000000f ) )
#define mm512_invert_16( v ) \
_mm512_permutexvar_epi16( v, _mm512_set_epi32( \
0x00000001, 0x00020003, 0x00040005, 0x00060007, \
0x00080009, 0x000A000B, 0x000C000D, 0x000E000F, \
0x00100011, 0x00120013, 0x00140015, 0x00160017, \
0x00180019, 0x001A001B, 0x001C001D, 0x001E001F ) )
_mm512_permutexvar_epi16( v, m512_const_64( \
0x0000000100020003, 0x0004000500060007, \
0x00080009000A000B, 0x000C000D000E000F, \
0x0010001100120013, 0x0014001500160017, \
0x00180019001A001B, 0x001C001D001E001F ) )
#define mm512_invert_8( v ) \
_mm512_permutexvar_epi8( v, _mm512_set_epi32( \
0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F, \
0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F, \
0x20212223, 0x24252627, 0x28292A2B, 0x2C2D2E2F, \
0x30313233, 0x34353637, 0x38393A3B, 0x3C3D3E3F ) )
_mm512_permutexvar_epi8( v, m512_const_64( \
0x0001020304050607, 0x08090A0B0C0D0E0F, \
0x1011121314151617, 0x18191A1B1C1D1E1F, \
0x2021222324252627, 0x28292A2B2C2D2E2F, \
0x3031323334353637, 0x38393A3B3C3D3E3F ) )
//
// Rotate elements within 256 bit lanes of 512 bit vector.
@@ -351,38 +293,46 @@ static inline __m512i m512_neg1_fn()
// Rotate 256 bit lanes by one 32 bit element
#define mm512_ror1x32_256( v ) \
_mm512_permutexvar_epi32( v, _mm512_set_epi32( \
8,15,14,13,12,11,10, 9, 0, 7, 6, 5, 4, 3, 2, 1 ) )
_mm512_permutexvar_epi32( v, m512_const_64( \
0x000000080000000f, 0x0000000e0000000d, \
0x0000000c0000000b, 0x0000000a00000009, \
0x0000000000000007, 0x0000000600000005, \
0x0000000400000003, 0x0000000200000001 ) )
#define mm512_rol1x32_256( v ) \
_mm512_permutexvar_epi32( v, _mm512_set_epi32( \
14,13,12,11,10, 9, 8,15, 6, 5, 4, 3, 2, 1, 0, 7 ) )
_mm512_permutexvar_epi32( v, m512_const_64( \
0x0000000e0000000d, 0x0000000c0000000b, \
0x0000000a00000009, 0x000000080000000f, \
0x0000000600000005, 0x0000000400000003, \
0x0000000200000001, 0x0000000000000007 ) )
#define mm512_ror1x16_256( v ) \
_mm512_permutexvar_epi16( v, _mm512_set_epi32( \
0x0010001F, 0x001E001D, 0x001C001B, 0x001A0019, \
0x00180017, 0x00160015, 0x00140013, 0x00120011, \
0x0000000F, 0x000E000D, 0x000C000B, 0x000A0009, \
0x00080007, 0x00060005, 0x00040003, 0x00020001 ) )
_mm512_permutexvar_epi16( v, m512_const_64( \
0x0010001F001E001D, 0x001C001B001A0019, \
0x0018001700160015, 0x0014001300120011, \
0x0000000F000E000D, 0x000C000B000A0009, \
0x0008000700060005, 0x0004000300020001 ) )
#define mm512_rol1x16_256( v ) \
_mm512_permutexvar_epi16( v, _mm512_set_epi32( \
0x001E001D, 0x001C001B, 0x001A0019, 0x00180017, \
0x00160015, 0x00140013, 0x00120011, 0x0000000F, \
0x000E000D, 0x000C000B, 0x000A0009, 0x00080007, \
0x00060005, 0x00040003, 0x00020001, 0x0000001F ) )
_mm512_permutexvar_epi16( v, m512_const_64( \
0x001E001D001C001B, 0x001A001900180017, \
0x0016001500140013, 0x001200110000000F, \
0x000E000D000C000B, 0x000A000900080007, \
0x0006000500040003, 0x000200010000001F ) )
#define mm512_ror1x8_256( v ) \
_mm512_permutexvar_epi8( v, _mm512_set_epi32( \
0x203F3E3D, 0x3C3B3A39, 0x38373635, 0x34333231, \
0x302F2E2D, 0x2C2B2A29, 0x28272625, 0x24232221, \
0x001F1E1D, 0x1C1B1A19, 0x18171615, 0x14131211, \
0x100F0E0D, 0x0C0B0A09, 0x08070605, 0x04030201 ) )
_mm512_permutexvar_epi8( v, m512_const_64( \
0x203F3E3D3C3B3A39, 0x3837363534333231, \
0x302F2E2D2C2B2A29, 0x2827262524232221, \
0x001F1E1D1C1B1A19, 0x1817161514131211, \
0x100F0E0D0C0B0A09, 0x0807060504030201 ) )
#define mm512_rol1x8_256( v ) \
_mm512_permutexvar_epi8( v, _mm512_set_epi32( \
0x3E3D3C3B, 0x3A393837, 0x36353433, 0x3231302F, \
0x2E2D2C2B, 0x2A292827, 0x26252423, 0x2221203F, \
0x1E1D1C1B, 0x1A191817, 0x16151413, 0x1211100F, \
0x0E0D0C0B, 0x0A090807, 0x06050403, 0x0201001F ) )
_mm512_permutexvar_epi8( v, m512_const_64( \
0x3E3D3C3B3A393837, 0x363534333231302F, \
0x2E2D2C2B2A292827, 0x262524232221203F, \
0x1E1D1C1B1A191817, 0x161514131211100F, \
0x0E0D0C0B0A090807, 0x060504030201001F ) )
//
// Rotate elements within 128 bit lanes of 512 bit vector.
@@ -441,80 +391,80 @@ static inline __m512i m512_neg1_fn()
// Rotate each 64 bit lane by one 16 bit element.
#define mm512_ror1x16_64( v ) \
_mm512_permutexvar_epi8( v, _mm512_set_epi32( \
0x39383F3E, 0x3D3C3B3A, 0x31303736, 0x35343332, \
0x29282F2E, 0x2D2C2B2A, 0x21202726, 0x25242322, \
0x19181F1E, 0x1D1C1B1A, 0x11101716, 0x15141312, \
0x09080F0E, 0x0D0C0B0A, 0x01000706, 0x05040302 ) )
_mm512_permutexvar_epi8( v, m512_const_64( \
0x39383F3E3D3C3B3A, 0x3130373635343332, \
0x29282F2E2D2C2B2A, 0x2120272625242322, \
0x19181F1E1D1C1B1A, 0x1110171615141312, \
0x09080F0E0D0C0B0A, 0x0100070605040302 ) )
#define mm512_rol1x16_64( v ) \
_mm512_permutexvar_epi8( v, _mm512_set_epi32( \
0x3D3C3B3A, 0x39383F3E, 0x35343332, 0x31303736 \
0x2D2C2B2A, 0x29282F2E, 0x25242322, 0x21202726 \
0x1D1C1B1A, 0x19181F1E, 0x15141312, 0x11101716 \
0x0D0C0B0A, 0x09080F0E, 0x05040302, 0x01000706 ) )
_mm512_permutexvar_epi8( v, m512_const_64( \
0x3D3C3B3A39383F3E, 0x3534333231303736 \
0x2D2C2B2A29282F2E, 0x2524232221202726 \
0x1D1C1B1A19181F1E, 0x1514131211101716 \
0x0D0C0B0A09080F0E, 0x0504030201000706 ) )
// Rotate each 64 bit lane by one byte.
#define mm512_ror1x8_64( v ) \
_mm512_permutexvar_epi8( v, _mm512_set_epi32( \
0x383F3E3D, 0x3C3B3A39, 0x30373635, 0x34333231, \
0x282F2E2D, 0x2C2B2A29, 0x20272625, 0x24232221, \
0x181F1E1D, 0x1C1B1A19, 0x10171615, 0x14131211, \
0x080F0E0D, 0x0C0B0A09, 0x00070605, 0x0403020 )
_mm512_permutexvar_epi8( v, m512_const_64( \
0x383F3E3D3C3B3A39, 0x3037363534333231, \
0x282F2E2D2C2B2A29, 0x2027262524232221, \
0x181F1E1D1C1B1A19, 0x1017161514131211, \
0x080F0E0D0C0B0A09, 0x0007060504030201 ) )
#define mm512_rol1x8_64( v ) \
_mm512_permutexvar_epi8( v, _mm512_set_epi32( \
0x3E3D3C3B, 0x3A39383F, 0x36353433, 0x32313037, \
0x2E2D2C2B, 0x2A29282F, 0x26252423, 0x22212027, \
0x1E1D1C1B, 0x1A19181F, 0x16151413, 0x12111017, \
0x0E0D0C0B, 0x0A09080F, 0x06050403, 0x02010007 )
_mm512_permutexvar_epi8( v, m512_const_64( \
0x3E3D3C3B3A39383F, 0x3635343332313037, \
0x2E2D2C2B2A29282F, 0x2625242322212027, \
0x1E1D1C1B1A19181F, 0x1615141312111017, \
0x0E0D0C0B0A09080F, 0x0605040302010007 ) )
//
// Rotate elements within 32 bit lanes.
#define mm512_swap16_32( v ) \
_mm512_permutexvar_epi8( v, _mm512_set_epi32( \
0x001D001C, 0x001F001E, 0x00190018, 0x001B001A, \
0x00150014, 0x00170016, 0x00110010, 0x00130012, \
0x000D000C, 0x000F000E, 0x00190008, 0x000B000A, \
0x00050004, 0x00070006, 0x00110000, 0x00030002 )
_mm512_permutexvar_epi8( v, m512_const_64( \
0x001D001C001F001E, 0x00190018001B001A, \
0x0015001400170016, 0x0011001000130012, \
0x000D000C000F000E, 0x00190008000B000A, \
0x0005000400070006, 0x0011000000030002 ) )
#define mm512_ror1x8_32( v ) \
_mm512_permutexvar_epi8( v, _mm512_set_epi32( \
0x3C3F3E3D, 0x383B3A39, 0x34373635, 0x30333231, \
0x2C2F2E2D, 0x282B2A29, 0x24272625, 0x20232221, \
0x1C1F1E1D, 0x181B1A19, 0x14171615, 0x10131211, \
0x0C0F0E0D, 0x080B0A09, 0x04070605, 0x00030201 ) )
_mm512_permutexvar_epi8( v, m512_const_64( \
0x3C3F3E3D383B3A39, 0x3437363530333231, \
0x2C2F2E2D282B2A29, 0x2427262520232221, \
0x1C1F1E1D181B1A19, 0x1417161510131211, \
0x0C0F0E0D080B0A09, 0x0407060500030201 ) )
#define mm512_rol1x8_32( v ) \
_mm512_permutexvar_epi8( v, _mm512_set_epi32( \
0x3E3D3C3F, 0x3A39383B, 0x36353437, 0x32313033, \
0x2E2D2C2F, 0x2A29282B, 0x26252427, 0x22212023, \
0x1E1D1C1F, 0x1A19181B, 0x16151417, 0x12111013, \
0x0E0D0C0F, 0x0A09080B, 0x06050407, 0x02010003 ) )
_mm512_permutexvar_epi8( v, m512_const_64( \
0x3E3D3C3F3A39383B, 0x3635343732313033, \
0x2E2D2C2F2A29282B, 0x2625242722212023, \
0x1E1D1C1F1A19181B, 0x1615141712111013, \
0x0E0D0C0F0A09080B, 0x0605040702010003 ) )
//
// Swap bytes in vector elements, vectorized bswap.
#define mm512_bswap_64( v ) \
_mm512_permutexvar_epi8( v, _mm512_set_epi32( \
0x38393A3B, 0x3C3D3E3F, 0x20313233, 0x34353637, \
0x28292A2B, 0x2C2D2E2F, 0x20212223, 0x34353637, \
0x18191A1B, 0x1C1D1E1F, 0x10111213, 0x14151617, \
0x08090A0B, 0x0C0D0E0F, 0x00010203, 0x04050607 ) )
_mm512_permutexvar_epi8( v, m512_const_64( \
0x38393A3B3C3D3E3F, 0x2031323334353637, \
0x28292A2B2C2D2E2F, 0x2021222334353637, \
0x18191A1B1C1D1E1F, 0x1011121314151617, \
0x08090A0B0C0D0E0F, 0x0001020304050607 ) )
#define mm512_bswap_32( v ) \
_mm512_permutexvar_epi8( v, _mm512_set_epi32( \
0x3C3D3E3F, 0x38393A3B, 0x34353637, 0x30313233, \
0x3C3D3E3F, 0x38393A3B, 0x34353637, 0x30313233, \
0x3C3D3E3F, 0x38393A3B, 0x34353637, 0x30313233, \
0x3C3D3E3F, 0x38393A3B, 0x34353637, 0x30313233 ) )
_mm512_permutexvar_epi8( v, m512_const_64( \
0x3C3D3E3F38393A3B, 0x3435363730313233, \
0x3C3D3E3F38393A3B, 0x3435363730313233, \
0x3C3D3E3F38393A3B, 0x3435363730313233, \
0x3C3D3E3F38393A3B, 0x3435363730313233 ) )
#define mm512_bswap_16( v ) \
_mm512_permutexvar_epi8( v, _mm512_set_epi32( \
0x3E3F3C3D, 0x3A3B3839, 0x36373435, 0x32333031, \
0x2E2F2C2D, 0x2A2B2829, 0x26272425, 0x22232021, \
0x1E1F1C1D, 0x1A1B1819, 0x16171415, 0x12131011, \
0x0E0F0C0D, 0x0A0B0809, 0x06070405, 0x02030001 ) )
_mm512_permutexvar_epi8( v, m512_const_64( \
0x3E3F3C3D3A3B3839, 0x3637343532333031, \
0x2E2F2C2D2A2B2829, 0x2627242522232021, \
0x1E1F1C1D1A1B1819, 0x1617141512131011, \
0x0E0F0C0D0A0B0809, 0x0607040502030001 ) )
//
// Rotate elements from 2 512 bit vectors in place, source arguments

View File

@@ -1,40 +0,0 @@
#include "uint256.h"
#ifdef __cplusplus
extern "C"{
#endif
#include "miner.h"
// compute the diff ratio between a found hash and the target
double hash_target_ratio(uint32_t* hash, uint32_t* target)
{
uint256 h, t;
double dhash;
if (!opt_showdiff)
return 0.0;
memcpy(&t, (void*) target, 32);
memcpy(&h, (void*) hash, 32);
dhash = h.getdouble();
if (dhash > 0.)
return t.getdouble() / dhash;
else
return dhash;
}
// store ratio in work struct
void work_set_target_ratio( struct work* work, uint32_t* hash )
{
// only if the option is enabled (to reduce cpu usage)
if (opt_showdiff) {
work->shareratio = hash_target_ratio(hash, work->target);
work->sharediff = work->targetdiff * work->shareratio;
}
}
#ifdef __cplusplus
}
#endif

784
uint256.h
View File

@@ -1,784 +0,0 @@
// Copyright (c) 2009-2010 Satoshi Nakamoto
// Copyright (c) 2009-2012 The Bitcoin developers
// Distributed under the MIT/X11 software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_UINT256_H
#define BITCOIN_UINT256_H
#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
#include <string>
#include <vector>
typedef long long int64;
typedef unsigned long long uint64;
inline int Testuint256AdHoc(std::vector<std::string> vArg);
/** Base class without constructors for uint256 and uint160.
* This makes the compiler let you use it in a union.
*/
template<unsigned int BITS>
class base_uint
{
protected:
enum { WIDTH=BITS/32 };
uint32_t pn[WIDTH];
public:
bool operator!() const
{
for (int i = 0; i < WIDTH; i++)
if (pn[i] != 0)
return false;
return true;
}
const base_uint operator~() const
{
base_uint ret;
for (int i = 0; i < WIDTH; i++)
ret.pn[i] = ~pn[i];
return ret;
}
const base_uint operator-() const
{
base_uint ret;
for (int i = 0; i < WIDTH; i++)
ret.pn[i] = ~pn[i];
ret++;
return ret;
}
double getdouble() const
{
double ret = 0.0;
double fact = 1.0;
for (int i = 0; i < WIDTH; i++) {
ret += fact * pn[i];
fact *= 4294967296.0;
}
return ret;
}
base_uint& operator=(uint64 b)
{
pn[0] = (unsigned int)b;
pn[1] = (unsigned int)(b >> 32);
for (int i = 2; i < WIDTH; i++)
pn[i] = 0;
return *this;
}
base_uint& operator^=(const base_uint& b)
{
for (int i = 0; i < WIDTH; i++)
pn[i] ^= b.pn[i];
return *this;
}
base_uint& operator&=(const base_uint& b)
{
for (int i = 0; i < WIDTH; i++)
pn[i] &= b.pn[i];
return *this;
}
base_uint& operator|=(const base_uint& b)
{
for (int i = 0; i < WIDTH; i++)
pn[i] |= b.pn[i];
return *this;
}
base_uint& operator^=(uint64 b)
{
pn[0] ^= (unsigned int)b;
pn[1] ^= (unsigned int)(b >> 32);
return *this;
}
base_uint& operator|=(uint64 b)
{
pn[0] |= (unsigned int)b;
pn[1] |= (unsigned int)(b >> 32);
return *this;
}
base_uint& operator<<=(unsigned int shift)
{
base_uint a(*this);
for (int i = 0; i < WIDTH; i++)
pn[i] = 0;
int k = shift / 32;
shift = shift % 32;
for (int i = 0; i < WIDTH; i++)
{
if (i+k+1 < WIDTH && shift != 0)
pn[i+k+1] |= (a.pn[i] >> (32-shift));
if (i+k < WIDTH)
pn[i+k] |= (a.pn[i] << shift);
}
return *this;
}
base_uint& operator>>=(unsigned int shift)
{
base_uint a(*this);
for (int i = 0; i < WIDTH; i++)
pn[i] = 0;
int k = shift / 32;
shift = shift % 32;
for (int i = 0; i < WIDTH; i++)
{
if (i-k-1 >= 0 && shift != 0)
pn[i-k-1] |= (a.pn[i] << (32-shift));
if (i-k >= 0)
pn[i-k] |= (a.pn[i] >> shift);
}
return *this;
}
base_uint& operator+=(const base_uint& b)
{
uint64 carry = 0;
for (int i = 0; i < WIDTH; i++)
{
uint64 n = carry + pn[i] + b.pn[i];
pn[i] = n & 0xffffffff;
carry = n >> 32;
}
return *this;
}
base_uint& operator-=(const base_uint& b)
{
*this += -b;
return *this;
}
base_uint& operator+=(uint64 b64)
{
base_uint b;
b = b64;
*this += b;
return *this;
}
base_uint& operator-=(uint64 b64)
{
base_uint b;
b = b64;
*this += -b;
return *this;
}
base_uint& operator++()
{
// prefix operator
int i = 0;
while (++pn[i] == 0 && i < WIDTH-1)
i++;
return *this;
}
const base_uint operator++(int)
{
// postfix operator
const base_uint ret = *this;
++(*this);
return ret;
}
base_uint& operator--()
{
// prefix operator
int i = 0;
while (--pn[i] == -1 && i < WIDTH-1)
i++;
return *this;
}
const base_uint operator--(int)
{
// postfix operator
const base_uint ret = *this;
--(*this);
return ret;
}
friend inline bool operator<(const base_uint& a, const base_uint& b)
{
for (int i = base_uint::WIDTH-1; i >= 0; i--)
{
if (a.pn[i] < b.pn[i])
return true;
else if (a.pn[i] > b.pn[i])
return false;
}
return false;
}
friend inline bool operator<=(const base_uint& a, const base_uint& b)
{
for (int i = base_uint::WIDTH-1; i >= 0; i--)
{
if (a.pn[i] < b.pn[i])
return true;
else if (a.pn[i] > b.pn[i])
return false;
}
return true;
}
friend inline bool operator>(const base_uint& a, const base_uint& b)
{
for (int i = base_uint::WIDTH-1; i >= 0; i--)
{
if (a.pn[i] > b.pn[i])
return true;
else if (a.pn[i] < b.pn[i])
return false;
}
return false;
}
friend inline bool operator>=(const base_uint& a, const base_uint& b)
{
for (int i = base_uint::WIDTH-1; i >= 0; i--)
{
if (a.pn[i] > b.pn[i])
return true;
else if (a.pn[i] < b.pn[i])
return false;
}
return true;
}
friend inline bool operator==(const base_uint& a, const base_uint& b)
{
for (int i = 0; i < base_uint::WIDTH; i++)
if (a.pn[i] != b.pn[i])
return false;
return true;
}
friend inline bool operator==(const base_uint& a, uint64 b)
{
if (a.pn[0] != (unsigned int)b)
return false;
if (a.pn[1] != (unsigned int)(b >> 32))
return false;
for (int i = 2; i < base_uint::WIDTH; i++)
if (a.pn[i] != 0)
return false;
return true;
}
friend inline bool operator!=(const base_uint& a, const base_uint& b)
{
return (!(a == b));
}
friend inline bool operator!=(const base_uint& a, uint64 b)
{
return (!(a == b));
}
std::string GetHex() const
{
char psz[sizeof(pn)*2 + 1];
for (unsigned int i = 0; i < sizeof(pn); i++)
sprintf(psz + i*2, "%02x", ((unsigned char*)pn)[sizeof(pn) - i - 1]);
return std::string(psz, psz + sizeof(pn)*2);
}
void SetHex(const char* psz)
{
for (int i = 0; i < WIDTH; i++)
pn[i] = 0;
// skip leading spaces
while (isspace(*psz))
psz++;
// skip 0x
if (psz[0] == '0' && tolower(psz[1]) == 'x')
psz += 2;
// hex string to uint
static const unsigned char phexdigit[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, 0,0xa,0xb,0xc,0xd,0xe,0xf,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0xa,0xb,0xc,0xd,0xe,0xf,0,0,0,0,0,0,0,0,0 };
const char* pbegin = psz;
while (phexdigit[(unsigned char)*psz] || *psz == '0')
psz++;
psz--;
unsigned char* p1 = (unsigned char*)pn;
unsigned char* pend = p1 + WIDTH * 4;
while (psz >= pbegin && p1 < pend)
{
*p1 = phexdigit[(unsigned char)*psz--];
if (psz >= pbegin)
{
*p1 |= (phexdigit[(unsigned char)*psz--] << 4);
p1++;
}
}
}
void SetHex(const std::string& str)
{
SetHex(str.c_str());
}
std::string ToString() const
{
return (GetHex());
}
unsigned char* begin()
{
return (unsigned char*)&pn[0];
}
unsigned char* end()
{
return (unsigned char*)&pn[WIDTH];
}
const unsigned char* begin() const
{
return (unsigned char*)&pn[0];
}
const unsigned char* end() const
{
return (unsigned char*)&pn[WIDTH];
}
unsigned int size() const
{
return sizeof(pn);
}
uint64 Get64(int n=0) const
{
return pn[2*n] | (uint64)pn[2*n+1] << 32;
}
// unsigned int GetSerializeSize(int nType=0, int nVersion=PROTOCOL_VERSION) const
unsigned int GetSerializeSize(int nType, int nVersion) const
{
return sizeof(pn);
}
template<typename Stream>
// void Serialize(Stream& s, int nType=0, int nVersion=PROTOCOL_VERSION) const
void Serialize(Stream& s, int nType, int nVersion) const
{
s.write((char*)pn, sizeof(pn));
}
template<typename Stream>
// void Unserialize(Stream& s, int nType=0, int nVersion=PROTOCOL_VERSION)
void Unserialize(Stream& s, int nType, int nVersion)
{
s.read((char*)pn, sizeof(pn));
}
friend class uint160;
friend class uint256;
friend inline int Testuint256AdHoc(std::vector<std::string> vArg);
};
typedef base_uint<160> base_uint160;
typedef base_uint<256> base_uint256;
//
// uint160 and uint256 could be implemented as templates, but to keep
// compile errors and debugging cleaner, they're copy and pasted.
//
//////////////////////////////////////////////////////////////////////////////
//
// uint160
//
/** 160-bit unsigned integer */
class uint160 : public base_uint160
{
public:
typedef base_uint160 basetype;
uint160()
{
for (int i = 0; i < WIDTH; i++)
pn[i] = 0;
}
uint160(const basetype& b)
{
for (int i = 0; i < WIDTH; i++)
pn[i] = b.pn[i];
}
uint160& operator=(const basetype& b)
{
for (int i = 0; i < WIDTH; i++)
pn[i] = b.pn[i];
return *this;
}
uint160(uint64 b)
{
pn[0] = (unsigned int)b;
pn[1] = (unsigned int)(b >> 32);
for (int i = 2; i < WIDTH; i++)
pn[i] = 0;
}
uint160& operator=(uint64 b)
{
pn[0] = (unsigned int)b;
pn[1] = (unsigned int)(b >> 32);
for (int i = 2; i < WIDTH; i++)
pn[i] = 0;
return *this;
}
explicit uint160(const std::string& str)
{
SetHex(str);
}
explicit uint160(const std::vector<unsigned char>& vch)
{
if (vch.size() == sizeof(pn))
memcpy(pn, &vch[0], sizeof(pn));
else
*this = 0;
}
};
inline bool operator==(const uint160& a, uint64 b) { return (base_uint160)a == b; }
inline bool operator!=(const uint160& a, uint64 b) { return (base_uint160)a != b; }
inline const uint160 operator<<(const base_uint160& a, unsigned int shift) { return uint160(a) <<= shift; }
inline const uint160 operator>>(const base_uint160& a, unsigned int shift) { return uint160(a) >>= shift; }
inline const uint160 operator<<(const uint160& a, unsigned int shift) { return uint160(a) <<= shift; }
inline const uint160 operator>>(const uint160& a, unsigned int shift) { return uint160(a) >>= shift; }
inline const uint160 operator^(const base_uint160& a, const base_uint160& b) { return uint160(a) ^= b; }
inline const uint160 operator&(const base_uint160& a, const base_uint160& b) { return uint160(a) &= b; }
inline const uint160 operator|(const base_uint160& a, const base_uint160& b) { return uint160(a) |= b; }
inline const uint160 operator+(const base_uint160& a, const base_uint160& b) { return uint160(a) += b; }
inline const uint160 operator-(const base_uint160& a, const base_uint160& b) { return uint160(a) -= b; }
inline bool operator<(const base_uint160& a, const uint160& b) { return (base_uint160)a < (base_uint160)b; }
inline bool operator<=(const base_uint160& a, const uint160& b) { return (base_uint160)a <= (base_uint160)b; }
inline bool operator>(const base_uint160& a, const uint160& b) { return (base_uint160)a > (base_uint160)b; }
inline bool operator>=(const base_uint160& a, const uint160& b) { return (base_uint160)a >= (base_uint160)b; }
inline bool operator==(const base_uint160& a, const uint160& b) { return (base_uint160)a == (base_uint160)b; }
inline bool operator!=(const base_uint160& a, const uint160& b) { return (base_uint160)a != (base_uint160)b; }
inline const uint160 operator^(const base_uint160& a, const uint160& b) { return (base_uint160)a ^ (base_uint160)b; }
inline const uint160 operator&(const base_uint160& a, const uint160& b) { return (base_uint160)a & (base_uint160)b; }
inline const uint160 operator|(const base_uint160& a, const uint160& b) { return (base_uint160)a | (base_uint160)b; }
inline const uint160 operator+(const base_uint160& a, const uint160& b) { return (base_uint160)a + (base_uint160)b; }
inline const uint160 operator-(const base_uint160& a, const uint160& b) { return (base_uint160)a - (base_uint160)b; }
inline bool operator<(const uint160& a, const base_uint160& b) { return (base_uint160)a < (base_uint160)b; }
inline bool operator<=(const uint160& a, const base_uint160& b) { return (base_uint160)a <= (base_uint160)b; }
inline bool operator>(const uint160& a, const base_uint160& b) { return (base_uint160)a > (base_uint160)b; }
inline bool operator>=(const uint160& a, const base_uint160& b) { return (base_uint160)a >= (base_uint160)b; }
inline bool operator==(const uint160& a, const base_uint160& b) { return (base_uint160)a == (base_uint160)b; }
inline bool operator!=(const uint160& a, const base_uint160& b) { return (base_uint160)a != (base_uint160)b; }
inline const uint160 operator^(const uint160& a, const base_uint160& b) { return (base_uint160)a ^ (base_uint160)b; }
inline const uint160 operator&(const uint160& a, const base_uint160& b) { return (base_uint160)a & (base_uint160)b; }
inline const uint160 operator|(const uint160& a, const base_uint160& b) { return (base_uint160)a | (base_uint160)b; }
inline const uint160 operator+(const uint160& a, const base_uint160& b) { return (base_uint160)a + (base_uint160)b; }
inline const uint160 operator-(const uint160& a, const base_uint160& b) { return (base_uint160)a - (base_uint160)b; }
inline bool operator<(const uint160& a, const uint160& b) { return (base_uint160)a < (base_uint160)b; }
inline bool operator<=(const uint160& a, const uint160& b) { return (base_uint160)a <= (base_uint160)b; }
inline bool operator>(const uint160& a, const uint160& b) { return (base_uint160)a > (base_uint160)b; }
inline bool operator>=(const uint160& a, const uint160& b) { return (base_uint160)a >= (base_uint160)b; }
inline bool operator==(const uint160& a, const uint160& b) { return (base_uint160)a == (base_uint160)b; }
inline bool operator!=(const uint160& a, const uint160& b) { return (base_uint160)a != (base_uint160)b; }
inline const uint160 operator^(const uint160& a, const uint160& b) { return (base_uint160)a ^ (base_uint160)b; }
inline const uint160 operator&(const uint160& a, const uint160& b) { return (base_uint160)a & (base_uint160)b; }
inline const uint160 operator|(const uint160& a, const uint160& b) { return (base_uint160)a | (base_uint160)b; }
inline const uint160 operator+(const uint160& a, const uint160& b) { return (base_uint160)a + (base_uint160)b; }
inline const uint160 operator-(const uint160& a, const uint160& b) { return (base_uint160)a - (base_uint160)b; }
//////////////////////////////////////////////////////////////////////////////
//
// uint256
//
/** 256-bit unsigned integer */
class uint256 : public base_uint256
{
public:
typedef base_uint256 basetype;
uint256()
{
for (int i = 0; i < WIDTH; i++)
pn[i] = 0;
}
uint256(const basetype& b)
{
for (int i = 0; i < WIDTH; i++)
pn[i] = b.pn[i];
}
uint256& operator=(const basetype& b)
{
for (int i = 0; i < WIDTH; i++)
pn[i] = b.pn[i];
return *this;
}
uint256(uint64 b)
{
pn[0] = (unsigned int)b;
pn[1] = (unsigned int)(b >> 32);
for (int i = 2; i < WIDTH; i++)
pn[i] = 0;
}
uint256& operator=(uint64 b)
{
pn[0] = (unsigned int)b;
pn[1] = (unsigned int)(b >> 32);
for (int i = 2; i < WIDTH; i++)
pn[i] = 0;
return *this;
}
explicit uint256(const std::string& str)
{
SetHex(str);
}
explicit uint256(const std::vector<unsigned char>& vch)
{
if (vch.size() == sizeof(pn))
memcpy(pn, &vch[0], sizeof(pn));
else
*this = 0;
}
};
inline bool operator==(const uint256& a, uint64 b) { return (base_uint256)a == b; }
inline bool operator!=(const uint256& a, uint64 b) { return (base_uint256)a != b; }
inline const uint256 operator<<(const base_uint256& a, unsigned int shift) { return uint256(a) <<= shift; }
inline const uint256 operator>>(const base_uint256& a, unsigned int shift) { return uint256(a) >>= shift; }
inline const uint256 operator<<(const uint256& a, unsigned int shift) { return uint256(a) <<= shift; }
inline const uint256 operator>>(const uint256& a, unsigned int shift) { return uint256(a) >>= shift; }
inline const uint256 operator^(const base_uint256& a, const base_uint256& b) { return uint256(a) ^= b; }
inline const uint256 operator&(const base_uint256& a, const base_uint256& b) { return uint256(a) &= b; }
inline const uint256 operator|(const base_uint256& a, const base_uint256& b) { return uint256(a) |= b; }
inline const uint256 operator+(const base_uint256& a, const base_uint256& b) { return uint256(a) += b; }
inline const uint256 operator-(const base_uint256& a, const base_uint256& b) { return uint256(a) -= b; }
inline bool operator<(const base_uint256& a, const uint256& b) { return (base_uint256)a < (base_uint256)b; }
inline bool operator<=(const base_uint256& a, const uint256& b) { return (base_uint256)a <= (base_uint256)b; }
inline bool operator>(const base_uint256& a, const uint256& b) { return (base_uint256)a > (base_uint256)b; }
inline bool operator>=(const base_uint256& a, const uint256& b) { return (base_uint256)a >= (base_uint256)b; }
inline bool operator==(const base_uint256& a, const uint256& b) { return (base_uint256)a == (base_uint256)b; }
inline bool operator!=(const base_uint256& a, const uint256& b) { return (base_uint256)a != (base_uint256)b; }
inline const uint256 operator^(const base_uint256& a, const uint256& b) { return (base_uint256)a ^ (base_uint256)b; }
inline const uint256 operator&(const base_uint256& a, const uint256& b) { return (base_uint256)a & (base_uint256)b; }
inline const uint256 operator|(const base_uint256& a, const uint256& b) { return (base_uint256)a | (base_uint256)b; }
inline const uint256 operator+(const base_uint256& a, const uint256& b) { return (base_uint256)a + (base_uint256)b; }
inline const uint256 operator-(const base_uint256& a, const uint256& b) { return (base_uint256)a - (base_uint256)b; }
inline bool operator<(const uint256& a, const base_uint256& b) { return (base_uint256)a < (base_uint256)b; }
inline bool operator<=(const uint256& a, const base_uint256& b) { return (base_uint256)a <= (base_uint256)b; }
inline bool operator>(const uint256& a, const base_uint256& b) { return (base_uint256)a > (base_uint256)b; }
inline bool operator>=(const uint256& a, const base_uint256& b) { return (base_uint256)a >= (base_uint256)b; }
inline bool operator==(const uint256& a, const base_uint256& b) { return (base_uint256)a == (base_uint256)b; }
inline bool operator!=(const uint256& a, const base_uint256& b) { return (base_uint256)a != (base_uint256)b; }
inline const uint256 operator^(const uint256& a, const base_uint256& b) { return (base_uint256)a ^ (base_uint256)b; }
inline const uint256 operator&(const uint256& a, const base_uint256& b) { return (base_uint256)a & (base_uint256)b; }
inline const uint256 operator|(const uint256& a, const base_uint256& b) { return (base_uint256)a | (base_uint256)b; }
inline const uint256 operator+(const uint256& a, const base_uint256& b) { return (base_uint256)a + (base_uint256)b; }
inline const uint256 operator-(const uint256& a, const base_uint256& b) { return (base_uint256)a - (base_uint256)b; }
inline bool operator<(const uint256& a, const uint256& b) { return (base_uint256)a < (base_uint256)b; }
inline bool operator<=(const uint256& a, const uint256& b) { return (base_uint256)a <= (base_uint256)b; }
inline bool operator>(const uint256& a, const uint256& b) { return (base_uint256)a > (base_uint256)b; }
inline bool operator>=(const uint256& a, const uint256& b) { return (base_uint256)a >= (base_uint256)b; }
inline bool operator==(const uint256& a, const uint256& b) { return (base_uint256)a == (base_uint256)b; }
inline bool operator!=(const uint256& a, const uint256& b) { return (base_uint256)a != (base_uint256)b; }
inline const uint256 operator^(const uint256& a, const uint256& b) { return (base_uint256)a ^ (base_uint256)b; }
inline const uint256 operator&(const uint256& a, const uint256& b) { return (base_uint256)a & (base_uint256)b; }
inline const uint256 operator|(const uint256& a, const uint256& b) { return (base_uint256)a | (base_uint256)b; }
inline const uint256 operator+(const uint256& a, const uint256& b) { return (base_uint256)a + (base_uint256)b; }
inline const uint256 operator-(const uint256& a, const uint256& b) { return (base_uint256)a - (base_uint256)b; }
#ifdef TEST_UINT256
inline int Testuint256AdHoc(std::vector<std::string> vArg)
{
uint256 g(0);
printf("%s\n", g.ToString().c_str());
g--; printf("g--\n");
printf("%s\n", g.ToString().c_str());
g--; printf("g--\n");
printf("%s\n", g.ToString().c_str());
g++; printf("g++\n");
printf("%s\n", g.ToString().c_str());
g++; printf("g++\n");
printf("%s\n", g.ToString().c_str());
g++; printf("g++\n");
printf("%s\n", g.ToString().c_str());
g++; printf("g++\n");
printf("%s\n", g.ToString().c_str());
uint256 a(7);
printf("a=7\n");
printf("%s\n", a.ToString().c_str());
uint256 b;
printf("b undefined\n");
printf("%s\n", b.ToString().c_str());
int c = 3;
a = c;
a.pn[3] = 15;
printf("%s\n", a.ToString().c_str());
uint256 k(c);
a = 5;
a.pn[3] = 15;
printf("%s\n", a.ToString().c_str());
b = 1;
b <<= 52;
a |= b;
a ^= 0x500;
printf("a %s\n", a.ToString().c_str());
a = a | b | (uint256)0x1000;
printf("a %s\n", a.ToString().c_str());
printf("b %s\n", b.ToString().c_str());
a = 0xfffffffe;
a.pn[4] = 9;
printf("%s\n", a.ToString().c_str());
a++;
printf("%s\n", a.ToString().c_str());
a++;
printf("%s\n", a.ToString().c_str());
a++;
printf("%s\n", a.ToString().c_str());
a++;
printf("%s\n", a.ToString().c_str());
a--;
printf("%s\n", a.ToString().c_str());
a--;
printf("%s\n", a.ToString().c_str());
a--;
printf("%s\n", a.ToString().c_str());
uint256 d = a--;
printf("%s\n", d.ToString().c_str());
printf("%s\n", a.ToString().c_str());
a--;
printf("%s\n", a.ToString().c_str());
a--;
printf("%s\n", a.ToString().c_str());
d = a;
printf("%s\n", d.ToString().c_str());
for (int i = uint256::WIDTH-1; i >= 0; i--) printf("%08x", d.pn[i]); printf("\n");
uint256 neg = d;
neg = ~neg;
printf("%s\n", neg.ToString().c_str());
uint256 e = uint256("0xABCDEF123abcdef12345678909832180000011111111");
printf("\n");
printf("%s\n", e.ToString().c_str());
printf("\n");
uint256 x1 = uint256("0xABCDEF123abcdef12345678909832180000011111111");
uint256 x2;
printf("%s\n", x1.ToString().c_str());
for (int i = 0; i < 270; i += 4)
{
x2 = x1 << i;
printf("%s\n", x2.ToString().c_str());
}
printf("\n");
printf("%s\n", x1.ToString().c_str());
for (int i = 0; i < 270; i += 4)
{
x2 = x1;
x2 >>= i;
printf("%s\n", x2.ToString().c_str());
}
for (int i = 0; i < 100; i++)
{
uint256 k = (~uint256(0) >> i);
printf("%s\n", k.ToString().c_str());
}
for (int i = 0; i < 100; i++)
{
uint256 k = (~uint256(0) << i);
printf("%s\n", k.ToString().c_str());
}
return (0);
}
#endif
#endif

80
util.c
View File

@@ -80,6 +80,86 @@ struct thread_q {
pthread_cond_t cond;
};
void applog2( int prio, const char *fmt, ... )
{
va_list ap;
va_start(ap, fmt);
#ifdef HAVE_SYSLOG_H
if (use_syslog) {
va_list ap2;
char *buf;
int len;
/* custom colors to syslog prio */
if (prio > LOG_DEBUG) {
switch (prio) {
case LOG_BLUE: prio = LOG_NOTICE; break;
}
}
va_copy(ap2, ap);
len = vsnprintf(NULL, 0, fmt, ap2) + 1;
va_end(ap2);
buf = alloca(len);
if (vsnprintf(buf, len, fmt, ap) >= 0)
syslog(prio, "%s", buf);
}
#else
if (0) {}
#endif
else {
const char* color = "";
char *f;
int len;
// struct tm tm;
// time_t now = time(NULL);
// localtime_r(&now, &tm);
switch (prio) {
case LOG_ERR: color = CL_RED; break;
case LOG_WARNING: color = CL_YLW; break;
case LOG_NOTICE: color = CL_WHT; break;
case LOG_INFO: color = ""; break;
case LOG_DEBUG: color = CL_GRY; break;
case LOG_BLUE:
prio = LOG_NOTICE;
color = CL_CYN;
break;
}
if (!use_colors)
color = "";
len = 64 + (int) strlen(fmt) + 2;
f = (char*) malloc(len);
sprintf(f, " %s %s%s\n",
// sprintf(f, "[%d-%02d-%02d %02d:%02d:%02d]%s %s%s\n",
// tm.tm_year + 1900,
// tm.tm_mon + 1,
// tm.tm_mday,
// tm.tm_hour,
// tm.tm_min,
// tm.tm_sec,
color,
fmt,
use_colors ? CL_N : ""
);
pthread_mutex_lock(&applog_lock);
vfprintf(stdout, f, ap); /* atomic write to stdout */
fflush(stdout);
free(f);
pthread_mutex_unlock(&applog_lock);
}
va_end(ap);
}
void applog(int prio, const char *fmt, ...)
{
va_list ap;