mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.7.5
This commit is contained in:
12
AUTHORS
12
AUTHORS
@@ -16,4 +16,16 @@ LucasJones
|
||||
|
||||
tpruvot@github
|
||||
|
||||
elmad
|
||||
|
||||
djm34
|
||||
|
||||
palmd
|
||||
|
||||
ig0tik3d
|
||||
|
||||
Wolf0
|
||||
|
||||
Optiminer
|
||||
|
||||
Jay D Dee
|
||||
|
@@ -104,7 +104,9 @@ cpuminer_SOURCES = \
|
||||
algo/lyra2/sponge.c \
|
||||
algo/lyra2/lyra2rev2.c \
|
||||
algo/lyra2/lyra2re.c \
|
||||
algo/lyra2/zcoin.c \
|
||||
algo/lyra2/lyra2z-gate.c \
|
||||
algo/lyra2/lyra2z.c \
|
||||
algo/lyra2/lyra2z-4way.c \
|
||||
algo/lyra2/lyra2z330.c \
|
||||
algo/m7m.c \
|
||||
algo/neoscrypt.c \
|
||||
|
@@ -35,8 +35,9 @@ Supported Algorithms
|
||||
heavy Heavy
|
||||
hmq1725 Espers
|
||||
hodl Hodlcoin
|
||||
jha jackpotcoin
|
||||
keccak Keccak
|
||||
jha Jackpotcoin
|
||||
keccak Maxcoin
|
||||
keccakc Creative coin
|
||||
lbry LBC, LBRY Credits
|
||||
luffa Luffa
|
||||
lyra2re lyra2
|
||||
@@ -50,7 +51,7 @@ Supported Algorithms
|
||||
pentablake Pentablake
|
||||
phi1612 phi, LUX coin
|
||||
pluck Pluck:128 (Supcoin)
|
||||
polytimos
|
||||
polytimos Ninja
|
||||
quark Quark
|
||||
qubit Qubit
|
||||
scrypt scrypt(1024, 1, 1) (default)
|
||||
|
@@ -164,6 +164,17 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.7.5
|
||||
|
||||
New algo keccakc for Creative coin with 4way optimizations
|
||||
|
||||
Rewrote some AVX/AVX2 code for more consistent implementation and some
|
||||
optimizing.
|
||||
|
||||
Enhanced capabilities check to support 4way, mor eprecise reporting of
|
||||
features (not all algos use SSE2), and better error messages when using
|
||||
an incompatible pre-built version (Windows users).
|
||||
|
||||
v3.7.4
|
||||
|
||||
Removed unnecessary build options.
|
||||
|
@@ -77,6 +77,12 @@ void algo_not_tested()
|
||||
applog(LOG_WARNING,"and bad things may happen. Use at your own risk.");
|
||||
}
|
||||
|
||||
void four_way_not_tested()
|
||||
{
|
||||
applog( LOG_WARNING,"Algo %s has not been tested using 4way. It may not", algo_names[opt_algo] );
|
||||
applog( LOG_WARNING,"work or may be slower. Please report your results.");
|
||||
}
|
||||
|
||||
void algo_not_implemented()
|
||||
{
|
||||
applog(LOG_ERR,"Algo %s has not been Implemented.",algo_names[opt_algo]);
|
||||
@@ -124,7 +130,7 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->do_this_thread = (void*)&return_true;
|
||||
gate->longpoll_rpc_call = (void*)&std_longpoll_rpc_call;
|
||||
gate->stratum_handle_response = (void*)&std_stratum_handle_response;
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->optimizations = EMPTY_SET;
|
||||
gate->ntime_index = STD_NTIME_INDEX;
|
||||
gate->nbits_index = STD_NBITS_INDEX;
|
||||
gate->nonce_index = STD_NONCE_INDEX;
|
||||
@@ -171,11 +177,12 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_HODL: register_hodl_algo ( gate ); break;
|
||||
case ALGO_JHA: register_jha_algo ( gate ); break;
|
||||
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
|
||||
case ALGO_KECCAKC: register_keccakc_algo ( gate ); break;
|
||||
case ALGO_LBRY: register_lbry_algo ( gate ); break;
|
||||
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
|
||||
case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break;
|
||||
case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z: register_zcoin_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z: register_lyra2z_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break;
|
||||
case ALGO_M7M: register_m7m_algo ( gate ); break;
|
||||
case ALGO_MYR_GR: register_myriad_algo ( gate ); break;
|
||||
|
@@ -85,12 +85,13 @@
|
||||
|
||||
typedef uint32_t set_t;
|
||||
|
||||
#define EMPTY_SET 0
|
||||
#define SSE2_OPT 1
|
||||
#define AES_OPT 2
|
||||
#define AVX_OPT 4
|
||||
#define AVX2_OPT 8
|
||||
#define SHA_OPT 16
|
||||
#define EMPTY_SET 0
|
||||
#define SSE2_OPT 1
|
||||
#define AES_OPT 2
|
||||
#define AVX_OPT 4
|
||||
#define AVX2_OPT 8
|
||||
#define SHA_OPT 0x10
|
||||
#define FOUR_WAY_OPT 0x20
|
||||
|
||||
// return set containing all elements from sets a & b
|
||||
inline set_t set_union ( set_t a, set_t b ) { return a | b; }
|
||||
@@ -156,7 +157,7 @@ bool return_false();
|
||||
void *return_null();
|
||||
void algo_not_tested();
|
||||
void algo_not_implemented();
|
||||
|
||||
void four_way_not_tested();
|
||||
|
||||
// Warning: algo_gate.nonce_index should only be used in targetted code
|
||||
// due to different behaviours by different targets. The JR2 index uses an
|
||||
|
@@ -9,18 +9,18 @@
|
||||
|
||||
void blakehash_4way(void *state, const void *input)
|
||||
{
|
||||
uint32_t hash0[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[16] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[16*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[4*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash0[4] __attribute__ ((aligned (32)));
|
||||
uint32_t hash1[4] __attribute__ ((aligned (32)));
|
||||
uint32_t hash2[4] __attribute__ ((aligned (32)));
|
||||
uint32_t hash3[4] __attribute__ ((aligned (32)));
|
||||
blake256_4way_context ctx;
|
||||
|
||||
blake256_4way_init( &ctx );
|
||||
blake256_4way( &ctx, input, 16 );
|
||||
blake256_4way_close( &ctx, vhash );
|
||||
|
||||
m128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
@@ -32,7 +32,7 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[4*4] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
@@ -49,7 +49,7 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
// we need big endian data...
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
m128_interleave_4x32( vdata, endiandata, endiandata, endiandata,
|
||||
mm_interleave_4x32( vdata, endiandata, endiandata, endiandata,
|
||||
endiandata, 640 );
|
||||
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
|
@@ -13,11 +13,12 @@ bool register_blake_algo( algo_gate_t* gate )
|
||||
// gate->scanhash = (void*)&scanhash_blake_8way;
|
||||
// gate->hash = (void*)&blakehash_8way;
|
||||
#if defined(BLAKE_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT;
|
||||
four_way_not_tested();
|
||||
gate->optimizations = FOUR_WAY_OPT;
|
||||
gate->scanhash = (void*)&scanhash_blake_4way;
|
||||
gate->hash = (void*)&blakehash_4way;
|
||||
four_way_not_tested();
|
||||
#else
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_blake;
|
||||
gate->hash = (void*)&blakehash;
|
||||
#endif
|
||||
|
@@ -536,22 +536,22 @@ do { \
|
||||
, _mmset_epi32( CS6, CS6, CS6, CS6 ) ); \
|
||||
VF = _mm_xor_si128( _mmset_epi32( T1, T1, T1, T1 ), \
|
||||
_mmset_epi32( CS7, CS7, CS7, CS7 ) ); \
|
||||
M[0x0] = mm_byteswap_epi32( *(buf + 0) ); \
|
||||
M[0x1] = mm_byteswap_epi32( *(buf + 1) ); \
|
||||
M[0x2] = mm_byteswap_epi32( *(buf + 2) ); \
|
||||
M[0x3] = mm_byteswap_epi32( *(buf + 3) ); \
|
||||
M[0x4] = mm_byteswap_epi32( *(buf + 4) ); \
|
||||
M[0x5] = mm_byteswap_epi32( *(buf + 5) ); \
|
||||
M[0x6] = mm_byteswap_epi32( *(buf + 6) ); \
|
||||
M[0x7] = mm_byteswap_epi32( *(buf + 7) ); \
|
||||
M[0x8] = mm_byteswap_epi32( *(buf + 8) ); \
|
||||
M[0x9] = mm_byteswap_epi32( *(buf + 9) ); \
|
||||
M[0xA] = mm_byteswap_epi32( *(buf + 10) ); \
|
||||
M[0xB] = mm_byteswap_epi32( *(buf + 11) ); \
|
||||
M[0xC] = mm_byteswap_epi32( *(buf + 12) ); \
|
||||
M[0xD] = mm_byteswap_epi32( *(buf + 13) ); \
|
||||
M[0xE] = mm_byteswap_epi32( *(buf + 14) ); \
|
||||
M[0xF] = mm_byteswap_epi32( *(buf + 15) ); \
|
||||
M[0x0] = mm_byteswap_32( *(buf + 0) ); \
|
||||
M[0x1] = mm_byteswap_32( *(buf + 1) ); \
|
||||
M[0x2] = mm_byteswap_32( *(buf + 2) ); \
|
||||
M[0x3] = mm_byteswap_32( *(buf + 3) ); \
|
||||
M[0x4] = mm_byteswap_32( *(buf + 4) ); \
|
||||
M[0x5] = mm_byteswap_32( *(buf + 5) ); \
|
||||
M[0x6] = mm_byteswap_32( *(buf + 6) ); \
|
||||
M[0x7] = mm_byteswap_32( *(buf + 7) ); \
|
||||
M[0x8] = mm_byteswap_32( *(buf + 8) ); \
|
||||
M[0x9] = mm_byteswap_32( *(buf + 9) ); \
|
||||
M[0xA] = mm_byteswap_32( *(buf + 10) ); \
|
||||
M[0xB] = mm_byteswap_32( *(buf + 11) ); \
|
||||
M[0xC] = mm_byteswap_32( *(buf + 12) ); \
|
||||
M[0xD] = mm_byteswap_32( *(buf + 13) ); \
|
||||
M[0xE] = mm_byteswap_32( *(buf + 14) ); \
|
||||
M[0xF] = mm_byteswap_32( *(buf + 15) ); \
|
||||
for (r = 0; r < BLAKE32_ROUNDS; r ++) \
|
||||
ROUND_S_4WAY(r); \
|
||||
H0 = _mm_xor_si128( _mm_xor_si128( \
|
||||
@@ -601,22 +601,22 @@ do { \
|
||||
_mm_set_epi32( CS6, CS6, CS6, CS6 ) ); \
|
||||
VF = _mm_xor_si128( _mm_set_epi32( T1, T1, T1, T1 ), \
|
||||
_mm_set_epi32( CS7, CS7, CS7, CS7 ) ); \
|
||||
M0 = mm_byteswap_epi32( * buf ); \
|
||||
M1 = mm_byteswap_epi32( *(buf+1) ); \
|
||||
M2 = mm_byteswap_epi32( *(buf+2) ); \
|
||||
M3 = mm_byteswap_epi32( *(buf+3) ); \
|
||||
M4 = mm_byteswap_epi32( *(buf+4) ); \
|
||||
M5 = mm_byteswap_epi32( *(buf+5) ); \
|
||||
M6 = mm_byteswap_epi32( *(buf+6) ); \
|
||||
M7 = mm_byteswap_epi32( *(buf+7) ); \
|
||||
M8 = mm_byteswap_epi32( *(buf+8) ); \
|
||||
M9 = mm_byteswap_epi32( *(buf+9) ); \
|
||||
MA = mm_byteswap_epi32( *(buf+10) ); \
|
||||
MB = mm_byteswap_epi32( *(buf+11) ); \
|
||||
MC = mm_byteswap_epi32( *(buf+12) ); \
|
||||
MD = mm_byteswap_epi32( *(buf+13) ); \
|
||||
ME = mm_byteswap_epi32( *(buf+14) ); \
|
||||
MF = mm_byteswap_epi32( *(buf+15) ); \
|
||||
M0 = mm_byteswap_32( * buf ); \
|
||||
M1 = mm_byteswap_32( *(buf+1) ); \
|
||||
M2 = mm_byteswap_32( *(buf+2) ); \
|
||||
M3 = mm_byteswap_32( *(buf+3) ); \
|
||||
M4 = mm_byteswap_32( *(buf+4) ); \
|
||||
M5 = mm_byteswap_32( *(buf+5) ); \
|
||||
M6 = mm_byteswap_32( *(buf+6) ); \
|
||||
M7 = mm_byteswap_32( *(buf+7) ); \
|
||||
M8 = mm_byteswap_32( *(buf+8) ); \
|
||||
M9 = mm_byteswap_32( *(buf+9) ); \
|
||||
MA = mm_byteswap_32( *(buf+10) ); \
|
||||
MB = mm_byteswap_32( *(buf+11) ); \
|
||||
MC = mm_byteswap_32( *(buf+12) ); \
|
||||
MD = mm_byteswap_32( *(buf+13) ); \
|
||||
ME = mm_byteswap_32( *(buf+14) ); \
|
||||
MF = mm_byteswap_32( *(buf+15) ); \
|
||||
ROUND_S_4WAY(0); \
|
||||
ROUND_S_4WAY(1); \
|
||||
ROUND_S_4WAY(2); \
|
||||
@@ -722,22 +722,22 @@ do { \
|
||||
_mm256_set256_epi64( CB6, CB6, CB6, CB6 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set_epi64( T1, T1, T1, T1 ), \
|
||||
_mm256_set256_epi64( CB7, CB7, CB7, CB7 ) ); \
|
||||
M[0x0] = mm256_byteswap_epi64( *(buf+0) ); \
|
||||
M[0x1] = mm256_byteswap_epi64( *(buf+1) ); \
|
||||
M[0x2] = mm256_byteswap_epi64( *(buf+2) ); \
|
||||
M[0x3] = mm256_byteswap_epi64( *(buf+3) ); \
|
||||
M[0x4] = mm256_byteswap_epi64( *(buf+4) ); \
|
||||
M[0x5] = mm256_byteswap_epi64( *(buf+5) ); \
|
||||
M[0x6] = mm256_byteswap_epi64( *(buf+6) ); \
|
||||
M[0x7] = mm256_byteswap_epi64( *(buf+7) ); \
|
||||
M[0x8] = mm256_byteswap_epi64( *(buf+8) ); \
|
||||
M[0x9] = mm256_byteswap_epi64( *(buf+9) ); \
|
||||
M[0xA] = mm256_byteswap_epi64( *(buf+10) ); \
|
||||
M[0xB] = mm256_byteswap_epi64( *(buf+11) ); \
|
||||
M[0xC] = mm256_byteswap_epi64( *(buf+12) ); \
|
||||
M[0xD] = mm256_byteswap_epi64( *(buf+13) ); \
|
||||
M[0xE] = mm256_byteswap_epi64( *(buf+14) ); \
|
||||
M[0xF] = mm256_byteswap_epi64( *(buf+15) ); \
|
||||
M[0x0] = mm256_byteswap_64( *(buf+0) ); \
|
||||
M[0x1] = mm256_byteswap_64( *(buf+1) ); \
|
||||
M[0x2] = mm256_byteswap_64( *(buf+2) ); \
|
||||
M[0x3] = mm256_byteswap_64( *(buf+3) ); \
|
||||
M[0x4] = mm256_byteswap_64( *(buf+4) ); \
|
||||
M[0x5] = mm256_byteswap_64( *(buf+5) ); \
|
||||
M[0x6] = mm256_byteswap_64( *(buf+6) ); \
|
||||
M[0x7] = mm256_byteswap_64( *(buf+7) ); \
|
||||
M[0x8] = mm256_byteswap_64( *(buf+8) ); \
|
||||
M[0x9] = mm256_byteswap_64( *(buf+9) ); \
|
||||
M[0xA] = mm256_byteswap_64( *(buf+10) ); \
|
||||
M[0xB] = mm256_byteswap_64( *(buf+11) ); \
|
||||
M[0xC] = mm256_byteswap_64( *(buf+12) ); \
|
||||
M[0xD] = mm256_byteswap_64( *(buf+13) ); \
|
||||
M[0xE] = mm256_byteswap_64( *(buf+14) ); \
|
||||
M[0xF] = mm256_byteswap_64( *(buf+15) ); \
|
||||
for (r = 0; r < 16; r ++) \
|
||||
ROUND_B_4WAY(r); \
|
||||
H0 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
@@ -787,22 +787,22 @@ do { \
|
||||
_mm256_set_epi64x( CB6, CB6, CB6, CB6 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set_epi64x( T1, T1, T1, T1 ), \
|
||||
_mm256_set_epi64x( CB7, CB7, CB7, CB7 ) ); \
|
||||
M0 = mm256_byteswap_epi64( *(buf + 0) ); \
|
||||
M1 = mm256_byteswap_epi64( *(buf + 1) ); \
|
||||
M2 = mm256_byteswap_epi64( *(buf + 2) ); \
|
||||
M3 = mm256_byteswap_epi64( *(buf + 3) ); \
|
||||
M4 = mm256_byteswap_epi64( *(buf + 4) ); \
|
||||
M5 = mm256_byteswap_epi64( *(buf + 5) ); \
|
||||
M6 = mm256_byteswap_epi64( *(buf + 6) ); \
|
||||
M7 = mm256_byteswap_epi64( *(buf + 7) ); \
|
||||
M8 = mm256_byteswap_epi64( *(buf + 8) ); \
|
||||
M9 = mm256_byteswap_epi64( *(buf + 9) ); \
|
||||
MA = mm256_byteswap_epi64( *(buf + 10) ); \
|
||||
MB = mm256_byteswap_epi64( *(buf + 11) ); \
|
||||
MC = mm256_byteswap_epi64( *(buf + 12) ); \
|
||||
MD = mm256_byteswap_epi64( *(buf + 13) ); \
|
||||
ME = mm256_byteswap_epi64( *(buf + 14) ); \
|
||||
MF = mm256_byteswap_epi64( *(buf + 15) ); \
|
||||
M0 = mm256_byteswap_64( *(buf + 0) ); \
|
||||
M1 = mm256_byteswap_64( *(buf + 1) ); \
|
||||
M2 = mm256_byteswap_64( *(buf + 2) ); \
|
||||
M3 = mm256_byteswap_64( *(buf + 3) ); \
|
||||
M4 = mm256_byteswap_64( *(buf + 4) ); \
|
||||
M5 = mm256_byteswap_64( *(buf + 5) ); \
|
||||
M6 = mm256_byteswap_64( *(buf + 6) ); \
|
||||
M7 = mm256_byteswap_64( *(buf + 7) ); \
|
||||
M8 = mm256_byteswap_64( *(buf + 8) ); \
|
||||
M9 = mm256_byteswap_64( *(buf + 9) ); \
|
||||
MA = mm256_byteswap_64( *(buf + 10) ); \
|
||||
MB = mm256_byteswap_64( *(buf + 11) ); \
|
||||
MC = mm256_byteswap_64( *(buf + 12) ); \
|
||||
MD = mm256_byteswap_64( *(buf + 13) ); \
|
||||
ME = mm256_byteswap_64( *(buf + 14) ); \
|
||||
MF = mm256_byteswap_64( *(buf + 15) ); \
|
||||
ROUND_B_4WAY(0); \
|
||||
ROUND_B_4WAY(1); \
|
||||
ROUND_B_4WAY(2); \
|
||||
@@ -870,7 +870,7 @@ blake32_4way( blake_4way_small_context *sc, const void *data, size_t len )
|
||||
|
||||
if ( len < buf_size - ptr )
|
||||
{
|
||||
memcpy_m128i( buf + (ptr>>2), vdata, len>>2 );
|
||||
memcpy_128( buf + (ptr>>2), vdata, len>>2 );
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
@@ -884,7 +884,7 @@ blake32_4way( blake_4way_small_context *sc, const void *data, size_t len )
|
||||
clen = buf_size - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy_m128i( buf + (ptr>>2), vdata, clen>>2 );
|
||||
memcpy_128( buf + (ptr>>2), vdata, clen>>2 );
|
||||
ptr += clen;
|
||||
vdata += (clen>>2);
|
||||
len -= clen;
|
||||
@@ -936,32 +936,32 @@ blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,
|
||||
|
||||
if ( ptr <= 48 )
|
||||
{
|
||||
memset_zero_m128i( u.buf + (ptr>>2) + 1, (48 - ptr) >> 2 );
|
||||
memset_zero_128( u.buf + (ptr>>2) + 1, (48 - ptr) >> 2 );
|
||||
if (out_size_w32 == 8)
|
||||
u.buf[52>>2] = _mm_or_si128( u.buf[52>>2],
|
||||
_mm_set_epi32( 0x010000000, 0x01000000,
|
||||
0x010000000, 0x01000000 ) );
|
||||
*(u.buf+(56>>2)) = mm_byteswap_epi32( _mm_set_epi32( th, th, th, th ) );
|
||||
*(u.buf+(60>>2)) = mm_byteswap_epi32( _mm_set_epi32( tl, tl, tl, tl ) );
|
||||
*(u.buf+(56>>2)) = mm_byteswap_32( _mm_set_epi32( th, th, th, th ) );
|
||||
*(u.buf+(60>>2)) = mm_byteswap_32( _mm_set_epi32( tl, tl, tl, tl ) );
|
||||
blake32_4way( sc, u.buf + (ptr>>2), 64 - ptr );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_m128i( u.buf + (ptr>>2) + 1, (60-ptr) >> 2 );
|
||||
memset_zero_128( u.buf + (ptr>>2) + 1, (60-ptr) >> 2 );
|
||||
blake32_4way( sc, u.buf + (ptr>>2), 64 - ptr );
|
||||
sc->T0 = SPH_C32(0xFFFFFE00);
|
||||
sc->T1 = SPH_C32(0xFFFFFFFF);
|
||||
memset_zero_m128i( u.buf, 56>>2 );
|
||||
memset_zero_128( u.buf, 56>>2 );
|
||||
if (out_size_w32 == 8)
|
||||
u.buf[52>>2] = _mm_set_epi32( 0x010000000, 0x01000000,
|
||||
0x010000000, 0x01000000 );
|
||||
*(u.buf+(56>>2)) = mm_byteswap_epi32( _mm_set_epi32( th, th, th, th ) );
|
||||
*(u.buf+(60>>2)) = mm_byteswap_epi32( _mm_set_epi32( tl, tl, tl, tl ) );
|
||||
*(u.buf+(56>>2)) = mm_byteswap_32( _mm_set_epi32( th, th, th, th ) );
|
||||
*(u.buf+(60>>2)) = mm_byteswap_32( _mm_set_epi32( tl, tl, tl, tl ) );
|
||||
blake32_4way( sc, u.buf, 64 );
|
||||
}
|
||||
out = (__m128i*)dst;
|
||||
for ( k = 0; k < out_size_w32; k++ )
|
||||
out[k] = mm_byteswap_epi32( sc->H[k] );
|
||||
out[k] = mm_byteswap_32( sc->H[k] );
|
||||
}
|
||||
|
||||
#if defined (__AVX2__)
|
||||
@@ -995,7 +995,7 @@ blake64_4way( blake_4way_big_context *sc, const void *data, size_t len)
|
||||
ptr = sc->ptr;
|
||||
if ( len < (buf_size - ptr) )
|
||||
{
|
||||
memcpy_m256i( buf + (ptr>>3), vdata, len>>3 );
|
||||
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
@@ -1009,7 +1009,7 @@ blake64_4way( blake_4way_big_context *sc, const void *data, size_t len)
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_m256i( buf + (ptr>>3), vdata, clen>>3 );
|
||||
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata = vdata + (clen>>3);
|
||||
len -= clen;
|
||||
@@ -1062,44 +1062,44 @@ blake64_4way_close( blake_4way_big_context *sc,
|
||||
}
|
||||
if ( ptr <= 104 )
|
||||
{
|
||||
memset_zero_m256i( u.buf + (ptr>>3) + 1, (104-ptr) >> 3 );
|
||||
memset_zero_256( u.buf + (ptr>>3) + 1, (104-ptr) >> 3 );
|
||||
if ( out_size_w64 == 8 )
|
||||
u.buf[(104>>3)] = _mm256_or_si256( u.buf[(104>>3)],
|
||||
_mm256_set_epi64x( 0x0100000000000000,
|
||||
0x0100000000000000,
|
||||
0x0100000000000000,
|
||||
0x0100000000000000 ) );
|
||||
*(u.buf+(112>>3)) = mm256_byteswap_epi64(
|
||||
*(u.buf+(112>>3)) = mm256_byteswap_64(
|
||||
_mm256_set_epi64x( th, th, th, th ) );
|
||||
*(u.buf+(120>>3)) = mm256_byteswap_epi64(
|
||||
*(u.buf+(120>>3)) = mm256_byteswap_64(
|
||||
_mm256_set_epi64x( tl, tl, tl, tl ) );
|
||||
|
||||
blake64_4way( sc, u.buf + (ptr>>3), 128 - ptr );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_m256i( u.buf + (ptr>>3) + 1, (120 - ptr) >> 3 );
|
||||
memset_zero_256( u.buf + (ptr>>3) + 1, (120 - ptr) >> 3 );
|
||||
|
||||
blake64_4way( sc, u.buf + (ptr>>3), 128 - ptr );
|
||||
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00);
|
||||
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFF);
|
||||
memset_zero_m256i( u.buf, 112>>3 );
|
||||
memset_zero_256( u.buf, 112>>3 );
|
||||
if ( out_size_w64 == 8 )
|
||||
u.buf[104>>3] = _mm256_set_epi64x( 0x0100000000000000,
|
||||
0x0100000000000000,
|
||||
0x0100000000000000,
|
||||
0x0100000000000000 );
|
||||
|
||||
*(u.buf+(112>>3)) = mm256_byteswap_epi64(
|
||||
*(u.buf+(112>>3)) = mm256_byteswap_64(
|
||||
_mm256_set_epi64x( th, th, th, th ) );
|
||||
*(u.buf+(120>>3)) = mm256_byteswap_epi64(
|
||||
*(u.buf+(120>>3)) = mm256_byteswap_64(
|
||||
_mm256_set_epi64x( tl, tl, tl, tl ) );
|
||||
|
||||
blake64_4way( sc, u.buf, 128 );
|
||||
}
|
||||
out = (__m256i*)dst;
|
||||
for ( k = 0; k < out_size_w64; k++ )
|
||||
out[k] = mm256_byteswap_epi64( sc->H[k] );
|
||||
out[k] = mm256_byteswap_64( sc->H[k] );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -13,17 +13,17 @@ static __thread bool ctx_midstate_done = false;
|
||||
|
||||
void decred_hash_4way( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash0[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[16] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[16*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[4*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash0[4] __attribute__ ((aligned (32)));
|
||||
uint32_t hash1[4] __attribute__ ((aligned (32)));
|
||||
uint32_t hash2[4] __attribute__ ((aligned (32)));
|
||||
uint32_t hash3[4] __attribute__ ((aligned (32)));
|
||||
blake256_4way_context ctx __attribute__ ((aligned (64)));
|
||||
|
||||
sph_blake256_context ctx2 __attribute__ ((aligned (64)));
|
||||
uint32_t hash[16] __attribute__ ((aligned (64)));
|
||||
uint32_t sin0[45], sin1[45], sin2[45], sin3[45];
|
||||
m128_deinterleave_4x32( sin0, sin1, sin2, sin3, (uint32_t*)input, 180*8 );
|
||||
mm_deinterleave_4x32x( sin0, sin1, sin2, sin3, input, 180*8 );
|
||||
|
||||
void *tail = input + DECRED_MIDSTATE_LEN;
|
||||
int tail_len = 180 - DECRED_MIDSTATE_LEN;
|
||||
@@ -53,7 +53,7 @@ void decred_hash_4way( void *state, const void *input )
|
||||
blake256_4way( &ctx, input, 180 );
|
||||
blake256_4way_close( &ctx, vhash );
|
||||
|
||||
m128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
/*
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( hash[i] != hash0[i] )
|
||||
@@ -79,7 +79,7 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t vdata[45*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[4*4] __attribute__ ((aligned (32)));
|
||||
uint32_t _ALIGN(64) endiandata[48];
|
||||
// uint32_t _ALIGN(64) hash32[8];
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -97,7 +97,8 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
// memcpy(endiandata, pdata, 180);
|
||||
|
||||
m128_interleave_4x32( vdata, pdata, pdata, pdata, pdata, 180*8 );
|
||||
// use the old way until new way updated for size.
|
||||
mm_interleave_4x32x( vdata, pdata, pdata, pdata, pdata, 180*8 );
|
||||
|
||||
uint32_t *noncep = vdata + DECRED_NONCE_INDEX * 4;
|
||||
do {
|
||||
|
@@ -144,7 +144,8 @@ bool decred_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||
bool register_decred_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(DECRED_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT;
|
||||
four_way_not_tested();
|
||||
gate->optimizations = FOUR_WAY_OPT;
|
||||
gate->scanhash = (void*)&scanhash_decred_4way;
|
||||
gate->hash = (void*)&decred_hash_4way;
|
||||
#else
|
||||
@@ -153,9 +154,6 @@ bool register_decred_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&decred_hash;
|
||||
#endif
|
||||
|
||||
// gate->optimizations = SSE2_OPT;
|
||||
// gate->scanhash = (void*)&scanhash_decred;
|
||||
// gate->hash = (void*)&decred_hash;
|
||||
gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
|
||||
gate->display_extra_data = (void*)&decred_decode_extradata;
|
||||
|
@@ -30,13 +30,13 @@ extern void pentablakehash_4way( void *output, const void *input )
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
uint64_t sin0[10], sin1[10], sin2[10], sin3[10];
|
||||
m256_deinterleave_4x64( sin0, sin1, sin2, sin3, input, 640 );
|
||||
mm256_deinterleave_4x64( sin0, sin1, sin2, sin3, input, 640 );
|
||||
sph_blake512_context ctx2_blake;
|
||||
sph_blake512_init(&ctx2_blake);
|
||||
sph_blake512(&ctx2_blake, sin0, 80);
|
||||
sph_blake512_close(&ctx2_blake, (void*) hash);
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
uint64_t* hash64 = (uint64_t*)hash;
|
||||
for( int i = 0; i < 8; i++ )
|
||||
{
|
||||
@@ -60,7 +60,7 @@ for( int i = 0; i < 8; i++ )
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
memcpy( output+64, hash2, 32 );
|
||||
@@ -141,7 +141,7 @@ int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
for ( int m=0; m < 6; m++ )
|
||||
{
|
||||
|
@@ -3,13 +3,13 @@
|
||||
bool register_pentablake_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (PENTABLAKE_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_pentablake_4way;
|
||||
gate->hash = (void*)&pentablakehash_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_pentablake;
|
||||
gate->hash = (void*)&pentablakehash;
|
||||
#endif
|
||||
gate->optimizations = FOUR_WAY_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
@@ -95,13 +95,13 @@ extern "C"{
|
||||
#define Sb(x0, x1, x2, x3, c) \
|
||||
do { \
|
||||
__m256i cc = _mm256_set_epi64x( c, c, c, c ); \
|
||||
x3 = mm256_bitnot( x3 ); \
|
||||
x0 = _mm256_xor_si256( x0, _mm256_and_si256( cc, mm256_bitnot( x2 ) ) ); \
|
||||
x3 = mm256_not( x3 ); \
|
||||
x0 = _mm256_xor_si256( x0, _mm256_and_si256( cc, mm256_not( x2 ) ) ); \
|
||||
tmp = _mm256_xor_si256( cc, _mm256_and_si256( x0, x1 ) ); \
|
||||
x0 = _mm256_xor_si256( x0, _mm256_and_si256( x2, x3 ) ); \
|
||||
x3 = _mm256_xor_si256( x3, _mm256_and_si256( mm256_bitnot( x1 ), x2 ) ); \
|
||||
x3 = _mm256_xor_si256( x3, _mm256_and_si256( mm256_not( x1 ), x2 ) ); \
|
||||
x1 = _mm256_xor_si256( x1, _mm256_and_si256( x0, x2 ) ); \
|
||||
x2 = _mm256_xor_si256( x2, _mm256_and_si256( x0, mm256_bitnot( x3 ) ) ); \
|
||||
x2 = _mm256_xor_si256( x2, _mm256_and_si256( x0, mm256_not( x3 ) ) ); \
|
||||
x0 = _mm256_xor_si256( x0, _mm256_or_si256( x1, x3 ) ); \
|
||||
x3 = _mm256_xor_si256( x3, _mm256_and_si256( x1, x2 ) ); \
|
||||
x1 = _mm256_xor_si256( x1, _mm256_and_si256( tmp, x0 ) ); \
|
||||
@@ -532,7 +532,7 @@ jh_4way_core( jh_4way_context *sc, const void *data, size_t len )
|
||||
|
||||
if ( len < (buf_size - ptr) )
|
||||
{
|
||||
memcpy_m256i( buf + (ptr>>3), vdata, len>>3 );
|
||||
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
@@ -546,7 +546,7 @@ jh_4way_core( jh_4way_context *sc, const void *data, size_t len )
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
|
||||
memcpy_m256i( buf + (ptr>>3), vdata, clen>>3 );
|
||||
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata += (clen>>3);
|
||||
len -= clen;
|
||||
@@ -579,7 +579,7 @@ jh_4way_close( jh_4way_context *sc, unsigned ub, unsigned n, void *dst,
|
||||
else
|
||||
numz = 112 - sc->ptr;
|
||||
|
||||
memset_zero_m256i( buf+1, (numz>>3) - 1 );
|
||||
memset_zero_256( buf+1, (numz>>3) - 1 );
|
||||
|
||||
l0 = SPH_T64(sc->block_count << 9) + (sc->ptr << 3);
|
||||
l1 = SPH_T64(sc->block_count >> 55);
|
||||
@@ -593,7 +593,7 @@ jh_4way_close( jh_4way_context *sc, unsigned ub, unsigned n, void *dst,
|
||||
for ( u=0; u < 8; u++ )
|
||||
buf[u] = sc->H[u+8];
|
||||
|
||||
memcpy_m256i( dst256, buf, 8 );
|
||||
memcpy_256( dst256, buf, 8 );
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -1,11 +1,12 @@
|
||||
#if defined(JHA_4WAY)
|
||||
|
||||
#include "jha-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "avxdefs.h"
|
||||
//#include "avxdefs.h"
|
||||
|
||||
#if defined(JHA_4WAY)
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
@@ -15,19 +16,19 @@
|
||||
//static __thread keccak512_4way_context jha_kec_mid
|
||||
// __attribute__ ((aligned (64)));
|
||||
|
||||
void jha_hash_4way( void *output, const void *input )
|
||||
void jha_hash_4way( void *out, const void *input )
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhasha[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashb[8*4] __attribute__ ((aligned (64)));
|
||||
__m256i mask;
|
||||
__m256i* vh256 = (__m256i*)vhash;
|
||||
__m256i* vha256 = (__m256i*)vhasha;
|
||||
__m256i* vhb256 = (__m256i*)vhashb;
|
||||
uint64_t vhash0[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash1[8*4] __attribute__ ((aligned (64)));
|
||||
__m256i mask0, mask1;
|
||||
__m256i* vh = (__m256i*)vhash;
|
||||
__m256i* vh0 = (__m256i*)vhash0;
|
||||
__m256i* vh1 = (__m256i*)vhash1;
|
||||
|
||||
blake512_4way_context ctx_blake;
|
||||
hashState_groestl ctx_groestl;
|
||||
@@ -40,21 +41,29 @@ void jha_hash_4way( void *output, const void *input )
|
||||
keccak512_4way_close( &ctx_keccak, vhash );
|
||||
|
||||
// memcpy( &ctx_keccak, &jha_kec_mid, sizeof jha_kec_mid );
|
||||
// keccak512_4way( &ctx_keccak, input+64, 16 );
|
||||
// keccak512_4way( &ctx_keccak, input + (64<<2), 16 );
|
||||
// keccak512_4way_close( &ctx_keccak, vhash );
|
||||
|
||||
// Heavy & Light Pair Loop
|
||||
for ( int round = 0; round < 3; round++ )
|
||||
{
|
||||
memset_zero_m256i( vha256, 20 );
|
||||
memset_zero_m256i( vhb256, 20 );
|
||||
// memset_zero_256( vh0, 20 );
|
||||
// memset_zero_256( vh1, 20 );
|
||||
|
||||
mask = _mm256_sub_epi64( _mm256_and_si256( vh256[0],
|
||||
mm256_vec_epi64( 0x1 ) ), mm256_vec_epi64( 0x1 ) );
|
||||
// positive logic, if maski select vhi
|
||||
// going from bit to mask reverses logic such that if the test bit is set
|
||||
// zero will be put in mask0, meaning don't take vh0. mask1 is
|
||||
// inverted so 1 will be put in mask1 meaning take it.
|
||||
mask0 = mm256_negate_64(
|
||||
_mm256_and_si256( vh[0], _mm256_set1_epi64x( 0x1 ) ) );
|
||||
mask1 = mm256_not( mask0 );
|
||||
|
||||
// mask = _mm256_sub_epi64( _mm256_and_si256( vh[0],
|
||||
// _mm256_set1_epi64x( 0x1 ) ), _mm256_set1_epi64x( 0x1 ) );
|
||||
|
||||
// groestl (serial) v skein
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash0,
|
||||
@@ -71,58 +80,66 @@ void jha_hash_4way( void *output, const void *input )
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
|
||||
m256_interleave_4x64( vhasha, hash0, hash1, hash2, hash3, 512 );
|
||||
mm256_interleave_4x64( vhash0, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// skein
|
||||
|
||||
skein512_4way_init( &ctx_skein );
|
||||
skein512_4way( &ctx_skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx_skein, vhashb );
|
||||
skein512_4way_close( &ctx_skein, vhash1 );
|
||||
|
||||
// merge vectored hash
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
{
|
||||
vh[i] = _mm256_or_si256( _mm256_and_si256( vh0[i], mask0 ),
|
||||
_mm256_and_si256( vh1[i], mask1 ) );
|
||||
/*
|
||||
vha256[i] = _mm256_maskload_epi64(
|
||||
vhasha + i*4, mm256_bitnot(mask ) );
|
||||
vhasha + i*4, mm256_not( mask ) );
|
||||
vhb256[i] = _mm256_maskload_epi64(
|
||||
vhashb + i*4, mask );
|
||||
vh256[i] = _mm256_or_si256( vha256[i], vhb256[i] );
|
||||
*/
|
||||
}
|
||||
|
||||
// blake v jh
|
||||
|
||||
blake512_4way_init( &ctx_blake );
|
||||
blake512_4way( &ctx_blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx_blake, vhasha );
|
||||
blake512_4way_close( &ctx_blake, vhash0 );
|
||||
|
||||
jh512_4way_init( &ctx_jh );
|
||||
jh512_4way( &ctx_jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx_jh, vhashb );
|
||||
jh512_4way_close( &ctx_jh, vhash1 );
|
||||
|
||||
// merge vectored hash
|
||||
// merge hash
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
{
|
||||
vh[i] = _mm256_or_si256( _mm256_and_si256( vh0[i], mask0 ),
|
||||
_mm256_and_si256( vh1[i], mask1 ) );
|
||||
/*
|
||||
vha256[i] = _mm256_maskload_epi64(
|
||||
vhasha + i*4, mm256_bitnot(mask ) );
|
||||
vhasha + i*4, mm256_not( mask ) );
|
||||
vhb256[i] = _mm256_maskload_epi64(
|
||||
vhashb + i*4, mask );
|
||||
vh256[i] = _mm256_or_si256( vha256[i], vhb256[i] );
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_deinterleave_4x64( out, out+32, out+64, out+96, vhash, 256 );
|
||||
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
memcpy( output+64, hash2, 32 );
|
||||
memcpy( output+96, hash3, 32 );
|
||||
// memcpy( output, hash0, 32 );
|
||||
// memcpy( output+32, hash1, 32 );
|
||||
// memcpy( output+64, hash2, 32 );
|
||||
// memcpy( output+96, hash3, 32 );
|
||||
|
||||
}
|
||||
|
||||
int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -160,7 +177,7 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
// precalc midstate for keccak
|
||||
// keccak512_4way_init( &jha_kec_mid );
|
||||
|
@@ -3,15 +3,16 @@
|
||||
|
||||
bool register_jha_algo( algo_gate_t* gate )
|
||||
{
|
||||
//#if defined (JHA_4WAY)
|
||||
// gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
// gate->scanhash = (void*)&scanhash_jha_4way;
|
||||
// gate->hash = (void*)&jha_hash_4way;
|
||||
//#else
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
#if defined (JHA_4WAY)
|
||||
four_way_not_tested();
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | FOUR_WAY_OPT;
|
||||
gate->scanhash = (void*)&scanhash_jha_4way;
|
||||
gate->hash = (void*)&jha_hash_4way;
|
||||
#else
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | FOUR_WAY_OPT;
|
||||
gate->scanhash = (void*)&scanhash_jha;
|
||||
gate->hash = (void*)&jha_hash;
|
||||
//#endif
|
||||
#endif
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
return true;
|
||||
};
|
||||
|
@@ -9,19 +9,17 @@
|
||||
#define JHA_4WAY
|
||||
#endif
|
||||
|
||||
//#if defined JHA_4WAY
|
||||
//void jha_hash_4way( void *state, const void *input );
|
||||
#if defined JHA_4WAY
|
||||
void jha_hash_4way( void *state, const void *input );
|
||||
|
||||
//int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
// uint64_t *hashes_done );
|
||||
//#else
|
||||
int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
void jha_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_jha( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
//#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -1,39 +1,30 @@
|
||||
#include "keccak-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "sph_keccak.h"
|
||||
#include "keccak-hash-4way.h"
|
||||
|
||||
#ifdef KECCAK_4WAY
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "sph_keccak.h"
|
||||
#include "keccak-hash-4way.h"
|
||||
|
||||
void keccakhash_4way(void *state, const void *input)
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
keccak256_4way_context ctx;
|
||||
uint64_t vhash[4*4] __attribute__ ((aligned (64)));
|
||||
keccak256_4way_context ctx;
|
||||
|
||||
keccak256_4way_init( &ctx );
|
||||
keccak256_4way( &ctx, input, 80 );
|
||||
keccak256_4way_close( &ctx, vhash );
|
||||
keccak256_4way_init( &ctx );
|
||||
keccak256_4way( &ctx, input, 80 );
|
||||
keccak256_4way_close( &ctx, vhash );
|
||||
|
||||
m256_deinterleave_4x64x( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
memcpy( state+64, hash2, 32 );
|
||||
memcpy( state+96, hash3, 32 );
|
||||
mm256_deinterleave_4x64( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
@@ -52,7 +43,7 @@ int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
m256_interleave_4x64x( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
|
@@ -9,19 +9,38 @@ int64_t keccak_get_max64() { return 0x7ffffLL; }
|
||||
|
||||
bool register_keccak_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = FOUR_WAY_OPT;
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
gate->set_target = (void*)&keccak_set_target;
|
||||
gate->get_max64 = (void*)&keccak_get_max64;
|
||||
#if defined (KECCAK_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_keccak_4way;
|
||||
gate->hash = (void*)&keccakhash_4way;
|
||||
#else
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_keccak;
|
||||
gate->hash = (void*)&keccakhash;
|
||||
#endif
|
||||
return true;
|
||||
};
|
||||
|
||||
void keccakc_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_keccakc_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = FOUR_WAY_OPT;
|
||||
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
|
||||
gate->set_target = (void*)&keccakc_set_target;
|
||||
gate->get_max64 = (void*)&keccak_get_max64;
|
||||
#if defined (KECCAK_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_keccak_4way;
|
||||
gate->hash = (void*)&keccakhash_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_keccak;
|
||||
gate->hash = (void*)&keccakhash;
|
||||
#endif
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -1,5 +1,5 @@
|
||||
#ifndef __KECCAK_GATE_H__
|
||||
#define __KECCAK_GATE_H__
|
||||
#ifndef KECCAK_GATE_H__
|
||||
#define KECCAK_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
@@ -54,10 +54,6 @@ static const sph_u64 RC[] = {
|
||||
kc->w[j ] = _mm256_xor_si256( kc->w[j], buf[j] ); \
|
||||
} while (0)
|
||||
|
||||
#define mm256_neg1 \
|
||||
(_mm256_set_epi64x( 0xffffffffffffffff, 0xffffffffffffffff, \
|
||||
0xffffffffffffffff, 0xffffffffffffffff ) )
|
||||
|
||||
#define DECL64(x) __m256i x
|
||||
#define MOV64(d, s) (d = s)
|
||||
#define XOR64(d, a, b) (d = _mm256_xor_si256(a,b))
|
||||
@@ -403,7 +399,7 @@ keccak64_core( keccak64_ctx_m256i *kc, const void *data, size_t len,
|
||||
|
||||
if ( len < (lim - ptr) )
|
||||
{
|
||||
memcpy_m256i( buf + (ptr>>3), vdata, len>>3 );
|
||||
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
|
||||
kc->ptr = ptr + len;
|
||||
return;
|
||||
}
|
||||
@@ -416,7 +412,7 @@ keccak64_core( keccak64_ctx_m256i *kc, const void *data, size_t len,
|
||||
clen = (lim - ptr);
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_m256i( buf + (ptr>>3), vdata, clen>>3 );
|
||||
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata = vdata + (clen>>3);
|
||||
len -= clen;
|
||||
@@ -453,7 +449,7 @@ static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t byte_len,
|
||||
{
|
||||
j = lim - kc->ptr;
|
||||
u.tmp[0] = _mm256_set_epi64x( eb, eb, eb, eb );
|
||||
memset_zero_m256i( u.tmp + 1, (j>>3) - 2 );
|
||||
memset_zero_256( u.tmp + 1, (j>>3) - 2 );
|
||||
u.tmp[ (j>>3) - 1] = _mm256_set_epi64x( 0x8000000000000000,
|
||||
0x8000000000000000, 0x8000000000000000, 0x8000000000000000);
|
||||
}
|
||||
@@ -467,7 +463,7 @@ static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t byte_len,
|
||||
NOT64( kc->w[20], kc->w[20] );
|
||||
for ( j = 0; j < m256_len; j++ )
|
||||
u.tmp[j] = kc->w[j];
|
||||
memcpy_m256i( dst, u.tmp, m256_len );
|
||||
memcpy_256( dst, u.tmp, m256_len );
|
||||
}
|
||||
|
||||
void keccak256_4way_init( void *kc )
|
||||
|
@@ -272,8 +272,8 @@ HashReturn update_luffa( hashState_luffa *state, const BitSequence *data,
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++ )
|
||||
{
|
||||
rnd512( state, mm_byteswap_epi32( casti_m128i( data, 1 ) ),
|
||||
mm_byteswap_epi32( casti_m128i( data, 0 ) ) );
|
||||
rnd512( state, mm_byteswap_32( casti_m128i( data, 1 ) ),
|
||||
mm_byteswap_32( casti_m128i( data, 0 ) ) );
|
||||
data += MSG_BLOCK_BYTE_LEN;
|
||||
}
|
||||
|
||||
@@ -282,7 +282,7 @@ HashReturn update_luffa( hashState_luffa *state, const BitSequence *data,
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// remaining data bytes
|
||||
casti_m128i( state->buffer, 0 ) = mm_byteswap_epi32( cast_m128i( data ) );
|
||||
casti_m128i( state->buffer, 0 ) = mm_byteswap_32( cast_m128i( data ) );
|
||||
// padding of partial block
|
||||
casti_m128i( state->buffer, 1 ) =
|
||||
_mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
|
||||
@@ -324,8 +324,8 @@ HashReturn update_and_final_luffa( hashState_luffa *state, BitSequence* output,
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++ )
|
||||
{
|
||||
rnd512( state, mm_byteswap_epi32( casti_m128i( data, 1 ) ),
|
||||
mm_byteswap_epi32( casti_m128i( data, 0 ) ) );
|
||||
rnd512( state, mm_byteswap_32( casti_m128i( data, 1 ) ),
|
||||
mm_byteswap_32( casti_m128i( data, 0 ) ) );
|
||||
data += MSG_BLOCK_BYTE_LEN;
|
||||
}
|
||||
|
||||
@@ -334,7 +334,7 @@ HashReturn update_and_final_luffa( hashState_luffa *state, BitSequence* output,
|
||||
{
|
||||
// padding of partial block
|
||||
rnd512( state, _mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ),
|
||||
mm_byteswap_epi32( cast_m128i( data ) ) );
|
||||
mm_byteswap_32( cast_m128i( data ) ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -542,7 +542,7 @@ static void finalization512( hashState_luffa *state, uint32 *b )
|
||||
|
||||
_mm256_store_si256( (__m256i*)hash, t );
|
||||
|
||||
casti_m256i( b, 0 ) = mm256_byteswap_epi32( casti_m256i( hash, 0 ) );
|
||||
casti_m256i( b, 0 ) = mm256_byteswap_32( casti_m256i( hash, 0 ) );
|
||||
|
||||
rnd512( state, zero, zero );
|
||||
|
||||
@@ -555,7 +555,7 @@ static void finalization512( hashState_luffa *state, uint32 *b )
|
||||
|
||||
_mm256_store_si256( (__m256i*)hash, t );
|
||||
|
||||
casti_m256i( b, 1 ) = mm256_byteswap_epi32( casti_m256i( hash, 0 ) );
|
||||
casti_m256i( b, 1 ) = mm256_byteswap_32( casti_m256i( hash, 0 ) );
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -587,8 +587,8 @@ static void finalization512( hashState_luffa *state, uint32 *b )
|
||||
_mm_store_si128((__m128i*)&hash[0], t[0]);
|
||||
_mm_store_si128((__m128i*)&hash[4], t[1]);
|
||||
|
||||
casti_m128i( b, 0 ) = mm_byteswap_epi32( casti_m128i( hash, 0 ) );
|
||||
casti_m128i( b, 1 ) = mm_byteswap_epi32( casti_m128i( hash, 1 ) );
|
||||
casti_m128i( b, 0 ) = mm_byteswap_32( casti_m128i( hash, 0 ) );
|
||||
casti_m128i( b, 1 ) = mm_byteswap_32( casti_m128i( hash, 1 ) );
|
||||
|
||||
rnd512( state, zero, zero );
|
||||
|
||||
@@ -609,8 +609,8 @@ static void finalization512( hashState_luffa *state, uint32 *b )
|
||||
_mm_store_si128((__m128i*)&hash[0], t[0]);
|
||||
_mm_store_si128((__m128i*)&hash[4], t[1]);
|
||||
|
||||
casti_m128i( b, 2 ) = mm_byteswap_epi32( casti_m128i( hash, 0 ) );
|
||||
casti_m128i( b, 3 ) = mm_byteswap_epi32( casti_m128i( hash, 1 ) );
|
||||
casti_m128i( b, 2 ) = mm_byteswap_32( casti_m128i( hash, 0 ) );
|
||||
casti_m128i( b, 3 ) = mm_byteswap_32( casti_m128i( hash, 1 ) );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@@ -377,7 +377,7 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t *wholeMatrix = _mm_malloc( i, 64 );
|
||||
if (wholeMatrix == NULL)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
#if defined (__AVX2__)
|
||||
memset_zero_m256i( (__m256i*)wholeMatrix, i/32 );
|
||||
#elif defined(__AVX__)
|
||||
@@ -385,7 +385,7 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd,
|
||||
#else
|
||||
memset(wholeMatrix, 0, i);
|
||||
#endif
|
||||
|
||||
*/
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
|
||||
//=== Getting the password + salt + basil padded with 10*1 ==========//
|
||||
|
@@ -128,34 +128,10 @@ void lyra2re_set_target ( struct work* work, double job_diff )
|
||||
work_set_target(work, job_diff / (128.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
/*
|
||||
bool lyra2re_thread_init()
|
||||
{
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 8; // nCols
|
||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
int i = (int64_t)ROW_LEN_BYTES * 8; // nRows;
|
||||
lyra2re_wholeMatrix = _mm_malloc( i, 64 );
|
||||
|
||||
if ( lyra2re_wholeMatrix == NULL )
|
||||
return false;
|
||||
|
||||
#if defined (__AVX2__)
|
||||
memset_zero_m256i( (__m256i*)lyra2re_wholeMatrix, i/32 );
|
||||
#elif defined(__AVX__)
|
||||
memset_zero_m128i( (__m128i*)lyra2re_wholeMatrix, i/16 );
|
||||
#else
|
||||
memset( lyra2re_wholeMatrix, 0, i );
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
|
||||
bool register_lyra2re_algo( algo_gate_t* gate )
|
||||
{
|
||||
init_lyra2re_ctx();
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
// gate->miner_thread_init = (void*)&lyra2re_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2re;
|
||||
gate->hash = (void*)&lyra2re_hash;
|
||||
gate->get_max64 = (void*)&lyra2re_get_max64;
|
||||
|
@@ -132,23 +132,13 @@ bool lyra2rev2_thread_init()
|
||||
int i = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
||||
l2v2_wholeMatrix = _mm_malloc( i, 64 );
|
||||
|
||||
if ( l2v2_wholeMatrix == NULL )
|
||||
return false;
|
||||
|
||||
#if defined (__AVX2__)
|
||||
memset_zero_m256i( (__m256i*)l2v2_wholeMatrix, i/32 );
|
||||
#elif defined (__AVX__)
|
||||
memset_zero_m128i( (__m128i*)l2v2_wholeMatrix, i/16 );
|
||||
#else
|
||||
memset( l2v2_wholeMatrix, 0, i );
|
||||
#endif
|
||||
return true;
|
||||
return l2v2_wholeMatrix;
|
||||
}
|
||||
|
||||
bool register_lyra2rev2_algo( algo_gate_t* gate )
|
||||
{
|
||||
init_lyra2rev2_ctx();
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2;
|
||||
gate->hash = (void*)&lyra2rev2_hash;
|
||||
|
168
algo/lyra2/lyra2z-4way.c
Normal file
168
algo/lyra2/lyra2z-4way.c
Normal file
@@ -0,0 +1,168 @@
|
||||
#include "lyra2z-gate.h"
|
||||
|
||||
#ifdef LYRA2Z_4WAY
|
||||
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
//#include "algo-gate-api.h"
|
||||
#include "lyra2.h"
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
//#include "avxdefs.h"
|
||||
|
||||
// same size, only difference is the name, lyra2 is done serially
|
||||
__thread uint64_t* lyra2z_4way_matrix;
|
||||
|
||||
bool lyra2z_4way_thread_init()
|
||||
{
|
||||
return ( lyra2z_4way_matrix = _mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_4way_context l2z_4way_blake_mid;
|
||||
|
||||
void lyra2z_4way_midstate( const void* input )
|
||||
{
|
||||
blake256_4way_init( &l2z_4way_blake_mid );
|
||||
blake256_4way( &l2z_4way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
// block 2050 new algo, blake plus new lyra parms. new input
|
||||
// is power of 2 so normal lyra can be used
|
||||
//void zcoin_hash(void *state, const void *input, uint32_t height)
|
||||
void lyra2z_4way_hash( void *state, const void *input )
|
||||
{
|
||||
// uint32_t _ALIGN(64) hash[16];
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake256_4way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
// memcpy( &ctx_blake, &l2z_4way_blake_mid, sizeof l2z_4way_blake_mid );
|
||||
// blake256_4way( &ctx_blake, input + (64*4), 16 );
|
||||
// blake256_4way_close( &ctx_blake, vhash );
|
||||
|
||||
blake256_4way_init( &ctx_blake );
|
||||
blake256_4way( &ctx_blake, input, 80 );
|
||||
blake256_4way_close( &ctx_blake, vhash );
|
||||
|
||||
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
|
||||
LYRA2Z( lyra2z_4way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
||||
// LYRA2Z( lyra2z_4way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_4way_matrix, hash2, 32, hash2, 32, hash2, 32, 8, 8, 8 );
|
||||
// LYRA2Z( lyra2z_4way_matrix, hash3, 32, hash3, 32, hash3, 32, 8, 8, 8 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
memcpy( state+64, hash2, 32 );
|
||||
memcpy( state+96, hash3, 32 );
|
||||
|
||||
// memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
// uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 76; // 19*4
|
||||
uint32_t *noncep1 = vdata + 77;
|
||||
uint32_t *noncep2 = vdata + 78;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &edata[i], pdata[i] );
|
||||
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
// lyra2z_4way_midstate( vdata );
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
be32enc( &edata[19], n );
|
||||
lyra2z_4way_hash( hash, vdata );
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
printf("found 0\n");
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = pdata[19] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
/* if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
printf("found 1\n");
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
*/
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
printf("found 2\n");
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
/*
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
printf("found 3\n");
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n += 4;
|
||||
*/
|
||||
n += 2;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
work_set_target_ratio(work, hash);
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
|
28
algo/lyra2/lyra2z-gate.c
Normal file
28
algo/lyra2/lyra2z-gate.c
Normal file
@@ -0,0 +1,28 @@
|
||||
#include "lyra2z-gate.h"
|
||||
#include "lyra2.h"
|
||||
|
||||
void lyra2z_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
{
|
||||
#ifdef LYRA2Z_4WAY
|
||||
four_way_not_tested();
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2z_4way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_4way;
|
||||
gate->hash = (void*)&lyra2z_4way_hash;
|
||||
#else
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2z_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z;
|
||||
gate->hash = (void*)&lyra2z_hash;
|
||||
#endif
|
||||
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
gate->set_target = (void*)&lyra2z_set_target;
|
||||
return true;
|
||||
};
|
||||
|
33
algo/lyra2/lyra2z-gate.h
Normal file
33
algo/lyra2/lyra2z-gate.h
Normal file
@@ -0,0 +1,33 @@
|
||||
#ifndef LYRA2Z_GATE_H__
|
||||
#define LYRA2Z_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(HASH_4WAY)
|
||||
#define LYRA2Z_4WAY
|
||||
#endif
|
||||
|
||||
|
||||
#define LYRA2Z_MATRIX_SIZE BLOCK_LEN_INT64 * 8 * 8 * 8
|
||||
|
||||
#if defined(LYRA2Z_4WAY)
|
||||
|
||||
void lyra2z_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
bool lyra2z_4way_thread_init();
|
||||
|
||||
#endif
|
||||
|
||||
void lyra2z_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
bool lyra2z_thread_init();
|
||||
|
||||
#endif
|
||||
|
@@ -1,40 +1,49 @@
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
#include "algo-gate-api.h"
|
||||
#include "lyra2z-gate.h"
|
||||
#include "lyra2.h"
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
__thread uint64_t* zcoin_wholeMatrix;
|
||||
__thread uint64_t* lyra2z_matrix;
|
||||
|
||||
static __thread sph_blake256_context zcoin_blake_mid;
|
||||
|
||||
|
||||
void zcoin_midstate( const void* input )
|
||||
bool lyra2z_thread_init()
|
||||
{
|
||||
sph_blake256_init( &zcoin_blake_mid );
|
||||
sph_blake256( &zcoin_blake_mid, input, 64 );
|
||||
// const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 8; // nCols
|
||||
// const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
// int i = (int64_t)ROW_LEN_BYTES * 8; // nRows;
|
||||
const int i = BLOCK_LEN_INT64 * 8 * 8 * 8;
|
||||
lyra2z_matrix = _mm_malloc( i, 64 );
|
||||
return lyra2z_matrix;
|
||||
}
|
||||
|
||||
static __thread sph_blake256_context lyra2z_blake_mid;
|
||||
|
||||
void lyra2z_midstate( const void* input )
|
||||
{
|
||||
sph_blake256_init( &lyra2z_blake_mid );
|
||||
sph_blake256( &lyra2z_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
// block 2050 new algo, blake plus new lyra parms. new input
|
||||
// is power of 2 so normal lyra can be used
|
||||
//void zcoin_hash(void *state, const void *input, uint32_t height)
|
||||
void zcoin_hash(void *state, const void *input )
|
||||
void lyra2z_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[16];
|
||||
|
||||
sph_blake256_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &zcoin_blake_mid, sizeof zcoin_blake_mid );
|
||||
memcpy( &ctx_blake, &lyra2z_blake_mid, sizeof lyra2z_blake_mid );
|
||||
sph_blake256( &ctx_blake, input + 64, 16 );
|
||||
sph_blake256_close( &ctx_blake, hash );
|
||||
|
||||
LYRA2Z( zcoin_wholeMatrix, hash, 32, hash, 32, hash, 32, 8, 8, 8);
|
||||
LYRA2Z( lyra2z_matrix, hash, 32, hash, 32, hash, 32, 8, 8, 8);
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_zcoin( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
@@ -52,11 +61,11 @@ int scanhash_zcoin( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
}
|
||||
|
||||
zcoin_midstate( endiandata );
|
||||
lyra2z_midstate( endiandata );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
zcoin_hash( hash, endiandata );
|
||||
lyra2z_hash( hash, endiandata );
|
||||
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
work_set_target_ratio(work, hash);
|
||||
@@ -73,50 +82,41 @@ int scanhash_zcoin( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
//int64_t get_max64_0xffffLL() { return 0xffffLL; };
|
||||
|
||||
void zcoin_set_target( struct work* work, double job_diff )
|
||||
void lyra2z_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
/*
|
||||
|
||||
bool zcoin_get_work_height( struct work* work, struct stratum_ctx* sctx )
|
||||
{
|
||||
work->height = sctx->bloc_height;
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
|
||||
bool zcoin_thread_init()
|
||||
|
||||
bool lyra2z_thread_init()
|
||||
{
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 8; // nCols
|
||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
int i = (int64_t)ROW_LEN_BYTES * 8; // nRows;
|
||||
zcoin_wholeMatrix = _mm_malloc( i, 64 );
|
||||
lyra2z_wholeMatrix = _mm_malloc( i, 64 );
|
||||
|
||||
if ( zcoin_wholeMatrix == NULL )
|
||||
return false;
|
||||
|
||||
#if defined (__AVX2__)
|
||||
memset_zero_m256i( (__m256i*)zcoin_wholeMatrix, i/32 );
|
||||
#elif defined(__AVX__)
|
||||
memset_zero_m128i( (__m128i*)zcoin_wholeMatrix, i/16 );
|
||||
#else
|
||||
memset( zcoin_wholeMatrix, 0, i );
|
||||
#endif
|
||||
return true;
|
||||
return lyra2z_wholeMatrix;
|
||||
}
|
||||
|
||||
bool register_zcoin_algo( algo_gate_t* gate )
|
||||
bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init = (void*)&zcoin_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_zcoin;
|
||||
gate->hash = (void*)&zcoin_hash;
|
||||
gate->miner_thread_init = (void*)&lyra2z_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z;
|
||||
gate->hash = (void*)&lyra2z_hash;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
gate->set_target = (void*)&zcoin_set_target;
|
||||
gate->set_target = (void*)&lyra2z_set_target;
|
||||
// gate->prevent_dupes = (void*)&zcoin_get_work_height;
|
||||
return true;
|
||||
};
|
||||
|
||||
*/
|
@@ -64,22 +64,12 @@ bool lyra2z330_thread_init()
|
||||
int i = (int64_t)ROW_LEN_BYTES * 330; // nRows;
|
||||
lyra2z330_wholeMatrix = _mm_malloc( i, 64 );
|
||||
|
||||
if ( lyra2z330_wholeMatrix == NULL )
|
||||
return false;
|
||||
|
||||
#if defined (__AVX2__)
|
||||
memset_zero_m256i( (__m256i*)lyra2z330_wholeMatrix, i/32 );
|
||||
#elif defined(__AVX__)
|
||||
memset_zero_m128i( (__m128i*)lyra2z330_wholeMatrix, i/16 );
|
||||
#else
|
||||
memset( lyra2z330_wholeMatrix, 0, i );
|
||||
#endif
|
||||
return true;
|
||||
return lyra2z330_wholeMatrix;
|
||||
}
|
||||
|
||||
bool register_lyra2z330_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2z330_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z330;
|
||||
gate->hash = (void*)&lyra2z330_hash;
|
||||
|
@@ -130,12 +130,12 @@ inline void squeeze( uint64_t *State, byte *Out, unsigned int len )
|
||||
//Squeezes full blocks
|
||||
for ( i = 0; i < fullBlocks; i++ )
|
||||
{
|
||||
memcpy_m256i( out, state, BLOCK_LEN_M256I );
|
||||
memcpy_256( out, state, BLOCK_LEN_M256I );
|
||||
LYRA_ROUND_AVX2( state[0], state[1], state[2], state[3] );
|
||||
out += BLOCK_LEN_M256I;
|
||||
}
|
||||
//Squeezes remaining bytes
|
||||
memcpy_m256i( out, state, ( len_m256i % BLOCK_LEN_M256I ) );
|
||||
memcpy_256( out, state, ( len_m256i % BLOCK_LEN_M256I ) );
|
||||
|
||||
#elif defined (__AVX__)
|
||||
|
||||
@@ -148,13 +148,13 @@ inline void squeeze( uint64_t *State, byte *Out, unsigned int len )
|
||||
//Squeezes full blocks
|
||||
for ( i = 0; i < fullBlocks; i++ )
|
||||
{
|
||||
memcpy_m128i( out, state, BLOCK_LEN_M128I );
|
||||
memcpy_128( out, state, BLOCK_LEN_M128I );
|
||||
LYRA_ROUND_AVX( state[0], state[1], state[2], state[3],
|
||||
state[4], state[5], state[6], state[7] );
|
||||
out += BLOCK_LEN_M128I;
|
||||
}
|
||||
//Squeezes remaining bytes
|
||||
memcpy_m128i( out, state, ( len_m128i % BLOCK_LEN_M128I ) );
|
||||
memcpy_128( out, state, ( len_m128i % BLOCK_LEN_M128I ) );
|
||||
|
||||
#else
|
||||
|
||||
|
@@ -66,11 +66,11 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
#define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
G_4X64( s0, s1, s2, s3 ); \
|
||||
s1 = mm256_rotl256_1x64( s1); \
|
||||
s2 = mm256_swap128( s2 ); \
|
||||
s2 = mm256_swap_128( s2 ); \
|
||||
s3 = mm256_rotr256_1x64( s3 ); \
|
||||
G_4X64( s0, s1, s2, s3 ); \
|
||||
s1 = mm256_rotr256_1x64( s1 ); \
|
||||
s2 = mm256_swap128( s2 ); \
|
||||
s2 = mm256_swap_128( s2 ); \
|
||||
s3 = mm256_rotl256_1x64( s3 );
|
||||
|
||||
#define LYRA_12_ROUNDS_AVX2( s0, s1, s2, s3 ) \
|
||||
@@ -105,14 +105,14 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
G_2X64( s0, s2, s4, s6 ); \
|
||||
G_2X64( s1, s3, s5, s7 ); \
|
||||
mm128_rotl256_1x64( s2, s3 ); \
|
||||
mm128_swap128( s4, s5 ); \
|
||||
mm128_rotr256_1x64( s6, s7 ); \
|
||||
mm_rotl256_1x64( s2, s3 ); \
|
||||
mm_swap_128( s4, s5 ); \
|
||||
mm_rotr256_1x64( s6, s7 ); \
|
||||
G_2X64( s0, s2, s4, s6 ); \
|
||||
G_2X64( s1, s3, s5, s7 ); \
|
||||
mm128_rotr256_1x64( s2, s3 ); \
|
||||
mm128_swap128( s4, s5 ); \
|
||||
mm128_rotl256_1x64( s6, s7 );
|
||||
mm_rotr256_1x64( s2, s3 ); \
|
||||
mm_swap_128( s4, s5 ); \
|
||||
mm_rotl256_1x64( s6, s7 );
|
||||
|
||||
#define LYRA_12_ROUNDS_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
|
@@ -15,7 +15,7 @@
|
||||
// no improvement with midstate
|
||||
//static __thread blake512_4way_context ctx_mid;
|
||||
|
||||
void nist5hash_4way( void *output, const void *input )
|
||||
void nist5hash_4way( void *out, const void *input )
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
@@ -35,7 +35,7 @@ void nist5hash_4way( void *output, const void *input )
|
||||
blake512_4way( &ctx_blake, input, 80 );
|
||||
blake512_4way_close( &ctx_blake, vhash );
|
||||
|
||||
m256_deinterleave_4x64x( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash0,
|
||||
@@ -50,7 +50,7 @@ void nist5hash_4way( void *output, const void *input )
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash3,
|
||||
(const char*)hash3, 512 );
|
||||
|
||||
m256_interleave_4x64x( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
jh512_4way_init( &ctx_jh );
|
||||
jh512_4way( &ctx_jh, vhash, 64 );
|
||||
@@ -64,12 +64,7 @@ void nist5hash_4way( void *output, const void *input )
|
||||
skein512_4way( &ctx_skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx_skein, vhash );
|
||||
|
||||
m256_deinterleave_4x64x( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
memcpy( output+64, hash2, 32 );
|
||||
memcpy( output+96, hash3, 32 );
|
||||
mm256_deinterleave_4x64( out, out+32, out+64, out+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
@@ -109,7 +104,7 @@ int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
// precalc midstate
|
||||
// blake512_4way_init( &ctx_mid );
|
||||
|
@@ -2,12 +2,11 @@
|
||||
|
||||
bool register_nist5_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | FOUR_WAY_OPT;
|
||||
#if defined (NIST5_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_nist5_4way;
|
||||
gate->hash = (void*)&nist5hash_4way;
|
||||
#else
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
init_nist5_ctx();
|
||||
gate->scanhash = (void*)&scanhash_nist5;
|
||||
gate->hash = (void*)&nist5hash;
|
||||
|
@@ -20,7 +20,7 @@ void skeinhash_4way( void *state, const void *input )
|
||||
skein512_4way( &ctx_skein, input, 80 );
|
||||
skein512_4way_close( &ctx_skein, vhash );
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash0, 64 );
|
||||
@@ -38,21 +38,20 @@ void skeinhash_4way( void *state, const void *input )
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash3, 64 );
|
||||
SHA256_Final( (unsigned char*)hash3, &ctx_sha256 );
|
||||
|
||||
memcpy( (char*)state, (char*)hash0, 32 );
|
||||
memcpy( ((char*)state) + 32, (char*)hash1, 32 );
|
||||
memcpy( ((char*)state) + 64, (char*)hash2, 32 );
|
||||
memcpy( ((char*)state) + 96, (char*)hash3, 32 );
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state + 32, hash1, 32 );
|
||||
memcpy( state + 64, hash2, 32 );
|
||||
memcpy( state + 96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
@@ -63,9 +62,9 @@ int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
// data is 80 bytes, 20 u32 or 4 u64.
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
swab32_array( edata, pdata, 20 );
|
||||
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_interleave_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
|
@@ -6,8 +6,8 @@ int64_t skein_get_max64() { return 0x7ffffLL; }
|
||||
|
||||
bool register_skein_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT| AVX2_OPT | SHA_OPT;
|
||||
#if defined (SKEIN_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_skein_4way;
|
||||
gate->hash = (void*)&skeinhash_4way;
|
||||
#else
|
||||
|
@@ -463,7 +463,7 @@ skein_big_core_4way( skein512_4way_context *sc, const void *data,
|
||||
|
||||
if ( len <= buf_size - ptr )
|
||||
{
|
||||
memcpy_m256i( buf + (ptr>>3), vdata, len>>3 );
|
||||
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
|
||||
sc->ptr = ptr + len;
|
||||
return;
|
||||
}
|
||||
@@ -483,7 +483,7 @@ skein_big_core_4way( skein512_4way_context *sc, const void *data,
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_m256i( buf + (ptr>>3), vdata, clen>>3 );
|
||||
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata += (clen>>3);
|
||||
len -= clen;
|
||||
@@ -520,11 +520,11 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
||||
|
||||
READ_STATE_BIG(sc);
|
||||
|
||||
memset_zero_m256i( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||
memset_zero_256( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||
et = 352 + ((bcount == 0) << 7);
|
||||
UBI_BIG_4WAY( et, ptr );
|
||||
|
||||
memset_zero_m256i( buf, buf_size >> 3 );
|
||||
memset_zero_256( buf, buf_size >> 3 );
|
||||
bcount = 0;
|
||||
UBI_BIG_4WAY( 510, 8 );
|
||||
|
||||
@@ -537,7 +537,7 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
||||
buf[6] = h6;
|
||||
buf[7] = h7;
|
||||
|
||||
memcpy_m256i( dst, buf, out_len >> 3 );
|
||||
memcpy_256( dst, buf, out_len >> 3 );
|
||||
}
|
||||
|
||||
static const sph_u64 IV256[] = {
|
||||
|
@@ -19,13 +19,13 @@ void skein2hash_4way( void *output, const void *input )
|
||||
skein512_4way( &ctx, hash, 64 );
|
||||
skein512_4way_close( &ctx, hash );
|
||||
|
||||
m256_deinterleave_4x64( out64, out64+4, out64+8, out64+12, hash, 256 );
|
||||
mm256_deinterleave_4x64( out64, out64+4, out64+8, out64+12, hash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
@@ -41,7 +41,7 @@ int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_interleave_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
|
@@ -9,12 +9,12 @@ int64_t skein2_get_max64 ()
|
||||
|
||||
bool register_skein2_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = FOUR_WAY_OPT;
|
||||
#if defined (FOUR_WAY) && defined (__AVX2__)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_skein2_4way;
|
||||
gate->hash = (void*)&skein2hash_4way;
|
||||
four_way_not_tested();
|
||||
#else
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_skein2;
|
||||
gate->hash = (void*)&skein2hash;
|
||||
#endif
|
||||
|
@@ -31,7 +31,7 @@ void tribus_hash_4way(void *state, const void *input)
|
||||
keccak512_4way( &ctx_keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx_keccak, vhash );
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// hash echo serially
|
||||
init_echo( &ctx_echo, 512 );
|
||||
@@ -92,7 +92,7 @@ int scanhash_tribus_4way(int thr_id, struct work *work, uint32_t max_nonce, uint
|
||||
}
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
// precalc midstate
|
||||
// doing it one way then then interleaving would be faster but too
|
||||
|
@@ -14,15 +14,13 @@ bool tribus_thread_init()
|
||||
*/
|
||||
bool register_tribus_algo( algo_gate_t* gate )
|
||||
{
|
||||
// gate->miner_thread_init = (void*)&tribus_thread_init;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | FOUR_WAY_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x1ffff;
|
||||
#if defined (TRIBUS_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_tribus_4way;
|
||||
gate->hash = (void*)&tribus_hash_4way;
|
||||
#else
|
||||
gate->miner_thread_init = (void*)&tribus_thread_init;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_tribus;
|
||||
gate->hash = (void*)&tribus_hash;
|
||||
#endif
|
||||
|
@@ -140,7 +140,7 @@ HASH ( void *cc, const void *data, size_t len )
|
||||
clen = SPH_BLEN - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_m256i( sc->buf + (ptr>>3), vdata, clen>>3 );
|
||||
memcpy_256( sc->buf + (ptr>>3), vdata, clen>>3 );
|
||||
vdata = vdata + (clen>>3);
|
||||
ptr += clen;
|
||||
len -= clen;
|
||||
@@ -195,19 +195,19 @@ SPH_XCAT( HASH, _addbits_and_close )(void *cc, unsigned ub, unsigned n,
|
||||
sc = cc;
|
||||
ptr = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
|
||||
uint64_t *b= (uint64_t*)sc->buf;
|
||||
uint64_t *s= (uint64_t*)sc->state;
|
||||
//uint64_t *b= (uint64_t*)sc->buf;
|
||||
//uint64_t *s= (uint64_t*)sc->state;
|
||||
//printf("Vptr 1= %u\n", ptr);
|
||||
//printf("VBuf %016llx %016llx %016llx %016llx\n", b[0], b[4], b[8], b[12] );
|
||||
//printf("VBuf %016llx %016llx %016llx %016llx\n", b[16], b[20], b[24], b[28] );
|
||||
|
||||
#ifdef PW01
|
||||
sc->buf[ptr>>3] = mm256_vec_epi64( 0x100 >> 8 );
|
||||
sc->buf[ptr>>3] = _mm256_set1_epi64x( 0x100 >> 8 );
|
||||
// sc->buf[ptr++] = 0x100 >> 8;
|
||||
#else
|
||||
// need to overwrite exactly one byte
|
||||
// sc->buf[ptr>>3] = _mm256_set_epi64x( 0, 0, 0, 0x80 );
|
||||
sc->buf[ptr>>3] = mm256_vec_epi64( 0x80 );
|
||||
sc->buf[ptr>>3] = _mm256_set1_epi64x( 0x80 );
|
||||
// ptr++;
|
||||
#endif
|
||||
ptr += 8;
|
||||
@@ -218,43 +218,43 @@ uint64_t *s= (uint64_t*)sc->state;
|
||||
|
||||
if ( ptr > SPH_MAXPAD )
|
||||
{
|
||||
memset_zero_m256i( sc->buf + (ptr>>3), (SPH_BLEN - ptr) >> 3 );
|
||||
memset_zero_256( sc->buf + (ptr>>3), (SPH_BLEN - ptr) >> 3 );
|
||||
RFUN( sc->buf, SPH_VAL );
|
||||
memset_zero_m256i( sc->buf, SPH_MAXPAD >> 3 );
|
||||
memset_zero_256( sc->buf, SPH_MAXPAD >> 3 );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_m256i( sc->buf + (ptr>>3), (SPH_MAXPAD - ptr) >> 3 );
|
||||
memset_zero_256( sc->buf + (ptr>>3), (SPH_MAXPAD - ptr) >> 3 );
|
||||
}
|
||||
#if defined BE64
|
||||
#if defined PLW1
|
||||
sc->buf[ SPH_MAXPAD>>3 ] =
|
||||
mm256_byteswap_epi64( mm256_vec_epi64( sc->count << 3 ) );
|
||||
mm256_byteswap_64( _mm256_set1_epi64x( sc->count << 3 ) );
|
||||
#elif defined PLW4
|
||||
memset_zero_m256i( sc->buf + (SPH_MAXPAD>>3), ( 2 * SPH_WLEN ) >> 3 );
|
||||
memset_zero_256( sc->buf + (SPH_MAXPAD>>3), ( 2 * SPH_WLEN ) >> 3 );
|
||||
sc->buf[ (SPH_MAXPAD + 2 * SPH_WLEN ) >> 3 ] =
|
||||
mm256_byteswap_epi64( mm256_vec_epi64( sc->count >> 61 ) );
|
||||
mm256_byteswap_64( _mm256_set1_epi64x( sc->count >> 61 ) );
|
||||
sc->buf[ (SPH_MAXPAD + 3 * SPH_WLEN ) >> 3 ] =
|
||||
mm256_byteswap_epi64( mm256_vec_epi64( sc->count << 3 ) );
|
||||
mm256_byteswap_64( _mm256_set1_epi64x( sc->count << 3 ) );
|
||||
#else
|
||||
sc->buf[ ( SPH_MAXPAD + 2 * SPH_WLEN ) >> 3 ] =
|
||||
mm256_byteswap_epi64( mm256_vec_epi64( sc->count >> 61 ) );
|
||||
mm256_byteswap_64( _mm256_set1_epi64x( sc->count >> 61 ) );
|
||||
sc->buf[ ( SPH_MAXPAD + 3 * SPH_WLEN ) >> 3 ] =
|
||||
mm256_byteswap_epi64( mm256_vec_epi64( sc->count << 3 ) );
|
||||
mm256_byteswap_64( _mm256_set1_epi64x( sc->count << 3 ) );
|
||||
#endif // PLW
|
||||
#else // LE64
|
||||
#if defined PLW1
|
||||
sc->buf[ SPH_MAXPAD >> 3 ] = mm256_vec_epi64( sc->count << 3 );
|
||||
sc->buf[ SPH_MAXPAD >> 3 ] = _mm256_set1_epi64x( sc->count << 3 );
|
||||
#elif defined PLW4
|
||||
sc->buf[ SPH_MAXPAD >> 3 ] = _mm256_vec_epi64( sc->count << 3 );
|
||||
sc->buf[ SPH_MAXPAD >> 3 ] = _mm256_set1_epi64x( sc->count << 3 );
|
||||
sc->buf[ ( SPH_MAXPAD + SPH_WLEN ) >> 3 ] =
|
||||
mm256_vec_epi64( c->count >> 61 );
|
||||
memset_zero_m256i( sc->buf + ( ( SPH_MAXPAD + 2 * SPH_WLEN ) >> 3 ),
|
||||
_mm256_set1_epi64x( c->count >> 61 );
|
||||
memset_zero_256( sc->buf + ( ( SPH_MAXPAD + 2 * SPH_WLEN ) >> 3 ),
|
||||
2 * SPH_WLEN );
|
||||
#else
|
||||
sc->buf[ SPH_MAXPAD >> 3 ] = mm256_vec_epi64( sc->count << 3 );
|
||||
sc->buf[ SPH_MAXPAD >> 3 ] = _mm256_set1_epi64x( sc->count << 3 );
|
||||
sc->buf[ ( SPH_MAXPAD + SPH_WLEN ) >> 3 ] =
|
||||
mm256_vec_epi64( sc->count >> 61 );
|
||||
_mm256_set1_epi64x( sc->count >> 61 );
|
||||
#endif // PLW
|
||||
|
||||
#endif // LE64
|
||||
@@ -276,7 +276,7 @@ uint64_t *s= (uint64_t*)sc->state;
|
||||
for ( u = 0; u < rnum; u ++ )
|
||||
{
|
||||
#if defined BE64
|
||||
((__m256i*)dst)[u] = mm256_byteswap_epi64( sc->val[u] );
|
||||
((__m256i*)dst)[u] = mm256_byteswap_64( sc->val[u] );
|
||||
#else // LE64
|
||||
((__m256i*)dst)[u] = sc->val[u];
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,209 +0,0 @@
|
||||
/* $Id: sph_whirlpool.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* WHIRLPOOL interface.
|
||||
*
|
||||
* WHIRLPOOL knows three variants, dubbed "WHIRLPOOL-0" (original
|
||||
* version, published in 2000, studied by NESSIE), "WHIRLPOOL-1"
|
||||
* (first revision, 2001, with a new S-box) and "WHIRLPOOL" (current
|
||||
* version, 2003, with a new diffusion matrix, also described as "plain
|
||||
* WHIRLPOOL"). All three variants are implemented here.
|
||||
*
|
||||
* The original WHIRLPOOL (i.e. WHIRLPOOL-0) was published in: P. S. L.
|
||||
* M. Barreto, V. Rijmen, "The Whirlpool Hashing Function", First open
|
||||
* NESSIE Workshop, Leuven, Belgium, November 13--14, 2000.
|
||||
*
|
||||
* The current WHIRLPOOL specification and a reference implementation
|
||||
* can be found on the WHIRLPOOL web page:
|
||||
* http://paginas.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_whirlpool.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_WHIRLPOOL_H__
|
||||
#define SPH_WHIRLPOOL_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for WHIRLPOOL.
|
||||
*/
|
||||
#define SPH_SIZE_whirlpool 512
|
||||
|
||||
/**
|
||||
* Output size (in bits) for WHIRLPOOL-0.
|
||||
*/
|
||||
#define SPH_SIZE_whirlpool0 512
|
||||
|
||||
/**
|
||||
* Output size (in bits) for WHIRLPOOL-1.
|
||||
*/
|
||||
#define SPH_SIZE_whirlpool1 512
|
||||
|
||||
/**
|
||||
* This structure is a context for WHIRLPOOL computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a WHIRLPOOL computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running WHIRLPOOL computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
sph_u64 state[8];
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_whirlpool_context;
|
||||
|
||||
/**
|
||||
* Initialize a WHIRLPOOL context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context (pointer to a
|
||||
* <code>sph_whirlpool_context</code>)
|
||||
*/
|
||||
void sph_whirlpool_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing). This function applies the
|
||||
* plain WHIRLPOOL algorithm.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_whirlpool(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current WHIRLPOOL computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_whirlpool_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL.
|
||||
*/
|
||||
typedef sph_whirlpool_context sph_whirlpool0_context;
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Initialize a WHIRLPOOL-0 context. This function is identical to
|
||||
* <code>sph_whirlpool_init()</code>.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context (pointer to a
|
||||
* <code>sph_whirlpool0_context</code>)
|
||||
*/
|
||||
void sph_whirlpool0_init(void *cc);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_whirlpool0_init sph_whirlpool_init
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing). This function applies the
|
||||
* WHIRLPOOL-0 algorithm.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_whirlpool0(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current WHIRLPOOL-0 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the WHIRLPOOL-0 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_whirlpool0_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* WHIRLPOOL-1 uses the same structure than plain WHIRLPOOL.
|
||||
*/
|
||||
typedef sph_whirlpool_context sph_whirlpool1_context;
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Initialize a WHIRLPOOL-1 context. This function is identical to
|
||||
* <code>sph_whirlpool_init()</code>.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context (pointer to a
|
||||
* <code>sph_whirlpool1_context</code>)
|
||||
*/
|
||||
void sph_whirlpool1_init(void *cc);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_whirlpool1_init sph_whirlpool_init
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing). This function applies the
|
||||
* WHIRLPOOL-1 algorithm.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_whirlpool1(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current WHIRLPOOL-1 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the WHIRLPOOL-1 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_whirlpool1_close(void *cc, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
@@ -41,7 +41,7 @@ void whirlpool_hash_4way( void *state, const void *input )
|
||||
whirlpool1_4way( &ctx, vhash, 64 );
|
||||
whirlpool1_4way_close( &ctx, vhash);
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
memcpy( state , hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
@@ -74,7 +74,7 @@ int scanhash_whirlpool_4way( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
// midstate
|
||||
whirlpool1_4way_init( &whirl_mid );
|
||||
|
@@ -3346,7 +3346,7 @@ do { \
|
||||
#define ROUND0 MUL8(ROUND0_W)
|
||||
#define UPDATE_STATE MUL8(UPDATE_STATE_W)
|
||||
#define BYTE(x, n) \
|
||||
_mm256_and_si256( _mm256_srli_epi64( x, n<<3 ), mm256_vec_epi64( 0xFF ) )
|
||||
_mm256_and_si256( _mm256_srli_epi64( x, n<<3 ), _mm256_set1_epi64x( 0xFF ) )
|
||||
|
||||
// A very complex, but structured, expression with a mix of scalar
|
||||
// and vector operations to retrieve specific 64 bit constants from
|
||||
@@ -3359,7 +3359,7 @@ do { \
|
||||
// Pack the data in a vector and return it.
|
||||
#define t_row( inv, row ) \
|
||||
_mm256_and_si256( \
|
||||
_mm256_srli_epi64( inv, row << 3 ), mm256_vec_epi64( 0xFF ) )
|
||||
_mm256_srli_epi64( inv, row << 3 ), _mm256_set1_epi64x( 0xFF ) )
|
||||
|
||||
// Extract vector element from "lane" of vector "in[row]" and use it to index
|
||||
// scalar array of constants "table" and return referenced 64 bit entry.
|
||||
@@ -3454,7 +3454,7 @@ void
|
||||
whirlpool_4way_init(void *cc)
|
||||
{
|
||||
whirlpool_4way_context *sc = cc;;
|
||||
memset_zero_m256i( sc->state, 8 );
|
||||
memset_zero_256( sc->state, 8 );
|
||||
sc->count = 0;
|
||||
}
|
||||
|
||||
@@ -3470,7 +3470,7 @@ name ## _round( const void *src, __m256i *state ) \
|
||||
ROUND0; \
|
||||
for (r = 0; r < 10; r ++) { \
|
||||
DECL8(tmp); \
|
||||
ROUND_KSCHED( type ## _T, h, tmp, mm256_vec_epi64( type ## _RC[r] ) ); \
|
||||
ROUND_KSCHED( type ## _T, h, tmp, _mm256_set1_epi64x( type ## _RC[r] ) ); \
|
||||
TRANSFER( h, tmp ); \
|
||||
ROUND_WENC( type ## _T, n, h, tmp ); \
|
||||
TRANSFER( n, tmp ); \
|
||||
|
@@ -7,6 +7,8 @@ CFLAGS="-O3 -march=core-avx2 -Wall -DUSE_SPH_SHA -DFOUR_WAY" ./configure --with-
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-4way.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-4way
|
||||
|
||||
make clean
|
||||
rm -f config.status
|
||||
@@ -15,6 +17,8 @@ CFLAGS="-O3 -march=core-avx2 -Wall -DUSE_SPH_SHA" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-aes-avx2.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-aes-avx2
|
||||
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
@@ -23,6 +27,8 @@ CFLAGS="-O3 -march=corei7-avx -Wall -DUSE_SPH_SHA" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-aes-avx.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-aes-avx
|
||||
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
@@ -31,6 +37,8 @@ CFLAGS="-O3 -maes -msse4.2 -Wall -DUSE_SPH_SHA" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-aes-sse42.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-aes-sse42
|
||||
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
@@ -39,6 +47,8 @@ CFLAGS="-O3 -march=corei7 -Wall -DUSE_SPH_SHA" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-sse42.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-sse42
|
||||
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
@@ -47,6 +57,8 @@ CFLAGS="-O3 -march=core2 -Wall -DUSE_SPH_SHA" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-sse2.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-sse2
|
||||
|
||||
make clean || echo done
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.7.4.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.7.5.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.7.4'
|
||||
PACKAGE_STRING='cpuminer-opt 3.7.4'
|
||||
PACKAGE_VERSION='3.7.5'
|
||||
PACKAGE_STRING='cpuminer-opt 3.7.5'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.7.4 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.7.5 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1392,7 +1392,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.7.4:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.7.5:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1497,7 +1497,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.7.4
|
||||
cpuminer-opt configure 3.7.5
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.7.4, which was
|
||||
It was created by cpuminer-opt $as_me 3.7.5, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2981,7 +2981,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.7.4'
|
||||
VERSION='3.7.5'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.7.4, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.7.5, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6743,7 +6743,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.7.4
|
||||
cpuminer-opt config.status 3.7.5
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.7.4])
|
||||
AC_INIT([cpuminer-opt], [3.7.5])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
151
cpu-miner.c
151
cpu-miner.c
@@ -2858,47 +2858,52 @@ static int thread_create(struct thr_info *thr, void* func)
|
||||
|
||||
static void show_credits()
|
||||
{
|
||||
printf("\n ********** "PACKAGE_NAME" "PACKAGE_VERSION" *********** \n");
|
||||
printf(" A CPU miner with multi algo support and optimized for CPUs\n");
|
||||
printf(" with AES_NI and AVX extensions.\n");
|
||||
printf(" BTC donation address: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT\n");
|
||||
printf(" Forked from TPruvot's cpuminer-multi with credits\n");
|
||||
printf(" to Lucas Jones, elmad, palmd, djm34, pooler, ig0tik3d,\n");
|
||||
printf(" Wolf0, Jeff Garzik and Optiminer.\n\n");
|
||||
printf("\n ********** "PACKAGE_NAME" "PACKAGE_VERSION" *********** \n");
|
||||
printf(" A CPU miner with multi algo support and optimized for CPUs\n");
|
||||
printf(" with AES_NI and AVX2 and SHA extensions.\n");
|
||||
printf(" BTC donation address: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT\n\n");
|
||||
}
|
||||
|
||||
bool check_cpu_capability ()
|
||||
{
|
||||
char cpu_brand[0x40];
|
||||
// there is no CPU related feature specific to 4way, just AVX2 and AES
|
||||
bool cpu_has_sse2 = has_sse2();
|
||||
bool cpu_has_aes = has_aes_ni();
|
||||
bool cpu_has_avx = has_avx1();
|
||||
bool cpu_has_avx2 = has_avx2();
|
||||
bool cpu_has_sha = has_sha();
|
||||
bool sw_has_sse2 = false;
|
||||
// no need to check if sw has sse2,
|
||||
// the code won't compile without it.
|
||||
// bool sw_has_sse2 = false;
|
||||
bool sw_has_aes = false;
|
||||
bool sw_has_avx = false;
|
||||
bool sw_has_avx2 = false;
|
||||
bool sw_has_sha = false;
|
||||
bool sw_has_4way = false;
|
||||
set_t algo_features = algo_gate.optimizations;
|
||||
bool algo_has_aes = set_incl( AES_OPT, algo_features );
|
||||
bool algo_has_avx = set_incl( AVX_OPT, algo_features );
|
||||
bool algo_has_avx2 = set_incl( AVX2_OPT, algo_features );
|
||||
bool algo_has_sha = set_incl( SHA_OPT, algo_features );
|
||||
bool algo_has_sse2 = set_incl( SSE2_OPT, algo_features );
|
||||
bool algo_has_aes = set_incl( AES_OPT, algo_features );
|
||||
bool algo_has_avx = set_incl( AVX_OPT, algo_features );
|
||||
bool algo_has_avx2 = set_incl( AVX2_OPT, algo_features );
|
||||
bool algo_has_sha = set_incl( SHA_OPT, algo_features );
|
||||
bool algo_has_4way = set_incl( FOUR_WAY_OPT, algo_features );
|
||||
bool use_aes;
|
||||
bool use_sse2;
|
||||
bool use_avx;
|
||||
bool use_avx2;
|
||||
bool use_sha;
|
||||
bool use_4way;
|
||||
bool use_none;
|
||||
|
||||
#ifdef __AES__
|
||||
sw_has_aes = true;
|
||||
#endif
|
||||
#ifdef __SSE2__
|
||||
sw_has_sse2 = true;
|
||||
#endif
|
||||
// #ifdef __SSE2__
|
||||
// sw_has_sse2 = true;
|
||||
// #endif
|
||||
#ifdef __AVX__
|
||||
sw_has_avx = true;;
|
||||
sw_has_avx = true;
|
||||
#endif
|
||||
#ifdef __AVX2__
|
||||
sw_has_avx2 = true;
|
||||
@@ -2906,64 +2911,104 @@ bool check_cpu_capability ()
|
||||
#ifdef __SHA__
|
||||
sw_has_sha = true;
|
||||
#endif
|
||||
|
||||
#ifdef HASH_4WAY
|
||||
sw_has_4way = true;
|
||||
#endif
|
||||
|
||||
#if !((__AES__) || (__SSE2__))
|
||||
printf("Neither __AES__ nor __SSE2__ defined.\n");
|
||||
#endif
|
||||
|
||||
cpu_brand_string( cpu_brand );
|
||||
printf( "CPU: %s\n", cpu_brand );
|
||||
printf( "CPU: %s.\n", cpu_brand );
|
||||
|
||||
printf("CPU features:");
|
||||
if ( cpu_has_sse2 ) printf( " SSE2" );
|
||||
if ( cpu_has_aes ) printf( " AES" );
|
||||
if ( cpu_has_avx ) printf( " AVX" );
|
||||
if ( cpu_has_avx2 ) printf( " AVX2" );
|
||||
if ( cpu_has_sha ) printf( " SHA" );
|
||||
|
||||
printf("\nSW built on " __DATE__
|
||||
printf("SW built on " __DATE__
|
||||
#ifdef _MSC_VER
|
||||
" with VC++ 2013\n");
|
||||
#elif defined(__GNUC__)
|
||||
" with GCC");
|
||||
printf(" %d.%d.%d\n", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
|
||||
printf(" %d.%d.%d.\n", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
|
||||
#else
|
||||
printf("\n");
|
||||
printf(".\n");
|
||||
#endif
|
||||
|
||||
printf("SW features:");
|
||||
if ( sw_has_sse2 ) printf( " SSE2" );
|
||||
if ( sw_has_aes ) printf( " AES" );
|
||||
if ( sw_has_avx ) printf( " AVX" );
|
||||
if ( sw_has_avx2 ) printf( " AVX2" );
|
||||
if ( sw_has_sha ) printf( " SHA" );
|
||||
printf("CPU features:");
|
||||
if ( cpu_has_sse2 ) printf( " SSE2" );
|
||||
if ( cpu_has_aes ) printf( " AES" );
|
||||
if ( cpu_has_avx ) printf( " AVX" );
|
||||
if ( cpu_has_avx2 ) printf( " AVX2" );
|
||||
if ( cpu_has_sha ) printf( " SHA" );
|
||||
|
||||
// SSE2 defaults to yes regardless
|
||||
printf("\nAlgo features: SSE2");
|
||||
if ( algo_has_aes ) printf( " AES" );
|
||||
if ( algo_has_avx ) printf( " AVX" );
|
||||
if ( algo_has_avx2 ) printf( " AVX2" );
|
||||
if ( algo_has_sha ) printf( " SHA" );
|
||||
printf("\n");
|
||||
printf(".\nSW features: SSE2");
|
||||
if ( sw_has_aes ) printf( " AES" );
|
||||
if ( sw_has_avx ) printf( " AVX" );
|
||||
if ( sw_has_avx2 ) printf( " AVX2" );
|
||||
if ( sw_has_4way ) printf( " 4WAY" );
|
||||
if ( sw_has_sha ) printf( " SHA" );
|
||||
|
||||
|
||||
use_sse2 = cpu_has_sse2 && sw_has_sse2;
|
||||
printf(".\nAlgo features:");
|
||||
if ( algo_has_sse2 ) printf( " SSE2" );
|
||||
if ( algo_has_aes ) printf( " AES" );
|
||||
if ( algo_has_avx ) printf( " AVX" );
|
||||
if ( algo_has_avx2 ) printf( " AVX2" );
|
||||
if ( algo_has_4way ) printf( " 4WAY" );
|
||||
if ( algo_has_sha ) printf( " SHA" );
|
||||
printf(".\n");
|
||||
|
||||
// Check for CPU and build incompatibilities
|
||||
if ( !cpu_has_sse2 )
|
||||
{
|
||||
printf( "A CPU with SSE2 is required to use cpuminer-opt\n" );
|
||||
return false;
|
||||
}
|
||||
if ( sw_has_avx2 && !( cpu_has_avx2 && cpu_has_aes ) )
|
||||
{
|
||||
if ( sw_has_4way && algo_has_4way )
|
||||
printf( "A CPU with AES and AVX2 is required to use 4way!\n" );
|
||||
else if ( algo_has_avx2 )
|
||||
printf( "A CPU with AES and AVX2 is required!\n" );
|
||||
return false;
|
||||
}
|
||||
if ( sw_has_avx && !( cpu_has_avx && cpu_has_aes ) )
|
||||
{
|
||||
printf( "A CPU with AES and AVX2 is required!\n" );
|
||||
return false;
|
||||
}
|
||||
if ( sw_has_aes && algo_has_aes && !cpu_has_aes )
|
||||
{
|
||||
printf( "A CPU with AES is required!\n" );
|
||||
return false;
|
||||
}
|
||||
if ( sw_has_sha && algo_has_sha && !cpu_has_sha )
|
||||
{
|
||||
printf( "A CPU with SHA is required!\n" );
|
||||
return false;
|
||||
}
|
||||
|
||||
// Determine mining options
|
||||
use_sse2 = cpu_has_sse2 && algo_has_sse2;
|
||||
use_aes = cpu_has_aes && sw_has_aes && algo_has_aes;
|
||||
use_avx = cpu_has_avx && sw_has_avx && algo_has_avx;
|
||||
use_avx2 = cpu_has_avx2 && sw_has_avx2 && algo_has_avx2;
|
||||
use_sha = cpu_has_sha && sw_has_sha && algo_has_sha;
|
||||
|
||||
if ( use_sse2 )
|
||||
{
|
||||
printf( "Start mining with SSE2" );
|
||||
if ( use_aes ) printf( " AES" );
|
||||
if ( use_avx2 ) printf( " AVX2" );
|
||||
else if ( use_avx ) printf( " AVX" );
|
||||
if ( use_sha ) printf( " SHA" );
|
||||
printf( "\n\n" );
|
||||
}
|
||||
use_4way = cpu_has_avx2 && sw_has_4way && algo_has_4way;
|
||||
use_none = !( use_sse2 || use_aes || use_avx || use_avx2 || use_sha
|
||||
|| use_4way );
|
||||
|
||||
// Display best options
|
||||
printf( "Start mining with" );
|
||||
if ( use_none ) printf( " no optimizations" );
|
||||
else
|
||||
printf( CL_RED "Unsupported CPU, miner will likely crash!\n\n" CL_N );
|
||||
{
|
||||
if ( use_aes ) printf( " AES" );
|
||||
if ( use_avx2 ) printf( " AVX2" );
|
||||
else if ( use_avx ) printf( " AVX" );
|
||||
else if ( use_sse2 ) printf( " SSE2" );
|
||||
if ( use_4way ) printf( " 4WAY" );
|
||||
if ( use_sha ) printf( " SHA" );
|
||||
}
|
||||
printf( ".\n\n" );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
9
miner.h
9
miner.h
@@ -12,6 +12,10 @@
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
#if defined(FOUR_WAY) && defined(__AVX2__)
|
||||
#define HASH_4WAY
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#undef USE_ASM /* to fix */
|
||||
@@ -499,6 +503,7 @@ enum algos {
|
||||
ALGO_HODL,
|
||||
ALGO_JHA,
|
||||
ALGO_KECCAK,
|
||||
ALGO_KECCAKC,
|
||||
ALGO_LBRY,
|
||||
ALGO_LUFFA,
|
||||
ALGO_LYRA2RE,
|
||||
@@ -568,6 +573,7 @@ static const char* const algo_names[] = {
|
||||
"hodl",
|
||||
"jha",
|
||||
"keccak",
|
||||
"keccakc",
|
||||
"lbry",
|
||||
"luffa",
|
||||
"lyra2re",
|
||||
@@ -690,7 +696,8 @@ Options:\n\
|
||||
hmq1725 Espers\n\
|
||||
hodl Hodlcoin\n\
|
||||
jha jackppot (Jackpotcoin)\n\
|
||||
keccak Keccak\n\
|
||||
keccak Maxcoin\n\
|
||||
keccakc Creative Coin\n\
|
||||
lbry LBC, LBRY Credits\n\
|
||||
luffa Luffa\n\
|
||||
lyra2re lyra2\n\
|
||||
|
26
winbuild.sh
26
winbuild.sh
@@ -1,26 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
#if [ "$OS" = "Windows_NT" ]; then
|
||||
# ./mingw64.sh
|
||||
# exit 0
|
||||
#fi
|
||||
|
||||
# Linux build
|
||||
|
||||
make distclean || echo clean
|
||||
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
|
||||
# Ubuntu 10.04 (gcc 4.4)
|
||||
# extracflags="-O3 -march=native -Wall -D_REENTRANT -funroll-loops -fvariable-expansion-in-unroller -fmerge-all-constants -fbranch-target-load-optimize2 -fsched2-use-superblocks -falign-loops=16 -falign-functions=16 -falign-jumps=16 -falign-labels=16"
|
||||
|
||||
# Debian 7.7 / Ubuntu 14.04 (gcc 4.7+)
|
||||
#extracflags="$extracflags -Ofast -flto -fuse-linker-plugin -ftree-loop-if-convert-stores"
|
||||
|
||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
||||
#CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
|
||||
|
||||
make -j 4
|
||||
|
||||
strip -s cpuminer
|
Reference in New Issue
Block a user