This commit is contained in:
Jay D Dee
2018-03-31 12:50:52 -04:00
parent f449c6725f
commit dd5e552357
51 changed files with 241 additions and 265 deletions

View File

@@ -107,9 +107,10 @@ Supported Algorithms
x13sm3 hsr (Hshare) x13sm3 hsr (Hshare)
x14 X14 x14 X14
x15 X15 x15 X15
x16r Ravencoin x16r Ravencoin (RVN)
x16s pigeoncoin (PGN)
x17 x17
xevan Bitsend xevan Bitsend (BSD)
yescrypt Globalboost-Y (BSTY) yescrypt Globalboost-Y (BSTY)
yescryptr8 BitZeny (ZNY) yescryptr8 BitZeny (ZNY)
yescryptr16 Yenten (YTN) yescryptr16 Yenten (YTN)
@@ -119,6 +120,8 @@ Supported Algorithms
Errata Errata
------ ------
Neoscrypt crashes on Windows, use legacy version.
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
supported by cpuminer-opt due to an incompatible implementation of SSE2 on supported by cpuminer-opt due to an incompatible implementation of SSE2 on
these CPUs. Some algos may crash the miner with an invalid instruction. these CPUs. Some algos may crash the miner with an invalid instruction.

View File

@@ -160,6 +160,12 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
Change Log Change Log
---------- ----------
v3.8.6
Fixed argon2 regression in v3.8.5.
Added x16s algo for Pigeoncoin.
Some code cleanup.
v3.8.5 v3.8.5
Added argon2d-crds and argon2d-dyn algos. Added argon2d-crds and argon2d-dyn algos.

View File

@@ -224,6 +224,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_X14: register_x14_algo ( gate ); break; case ALGO_X14: register_x14_algo ( gate ); break;
case ALGO_X15: register_x15_algo ( gate ); break; case ALGO_X15: register_x15_algo ( gate ); break;
case ALGO_X16R: register_x16r_algo ( gate ); break; case ALGO_X16R: register_x16r_algo ( gate ); break;
case ALGO_X16S: register_x16s_algo ( gate ); break;
case ALGO_X17: register_x17_algo ( gate ); break; case ALGO_X17: register_x17_algo ( gate ); break;
case ALGO_XEVAN: register_xevan_algo ( gate ); break; case ALGO_XEVAN: register_xevan_algo ( gate ); break;
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break; case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;

View File

@@ -295,7 +295,7 @@ void ar2_initial_hash(uint8_t *blockhash, argon2_context *context,
store32(&value, ADLEN); store32(&value, ADLEN);
my_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); my_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); ar2_blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
} }
int ar2_initialize(argon2_instance_t *instance, argon2_context *context) { int ar2_initialize(argon2_instance_t *instance, argon2_context *context) {

View File

@@ -70,7 +70,7 @@ bool register_argon2d_crds_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_argon2d_crds; gate->scanhash = (void*)&scanhash_argon2d_crds;
gate->hash = (void*)&argon2d_crds_hash; gate->hash = (void*)&argon2d_crds_hash;
gate->set_target = (void*)&scrypt_set_target; gate->set_target = (void*)&scrypt_set_target;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
} }
// Dynamic // Dynamic
@@ -138,6 +138,6 @@ bool register_argon2d_dyn_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_argon2d_dyn; gate->scanhash = (void*)&scanhash_argon2d_dyn;
gate->hash = (void*)&argon2d_dyn_hash; gate->hash = (void*)&argon2d_dyn_hash;
gate->set_target = (void*)&scrypt_set_target; gate->set_target = (void*)&scrypt_set_target;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
} }

View File

@@ -10,7 +10,7 @@ bool register_blake_algo( algo_gate_t* gate )
gate->optimizations = AVX2_OPT; gate->optimizations = AVX2_OPT;
gate->get_max64 = (void*)&blake_get_max64; gate->get_max64 = (void*)&blake_get_max64;
//#if defined (__AVX2__) && defined (FOUR_WAY) //#if defined (__AVX2__) && defined (FOUR_WAY)
// gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT; // gate->optimizations = SSE2_OPT | AVX2_OPT;
// gate->scanhash = (void*)&scanhash_blake_8way; // gate->scanhash = (void*)&scanhash_blake_8way;
// gate->hash = (void*)&blakehash_8way; // gate->hash = (void*)&blakehash_8way;
#if defined(BLAKE_4WAY) #if defined(BLAKE_4WAY)

View File

@@ -20,7 +20,7 @@ bool register_blake2s_algo( algo_gate_t* gate )
gate->hash = (void*)&blake2s_hash; gate->hash = (void*)&blake2s_hash;
#endif #endif
gate->get_max64 = (void*)&blake2s_get_max64; gate->get_max64 = (void*)&blake2s_get_max64;
gate->optimizations = SSE42_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE42_OPT | AVX2_OPT;
return true; return true;
}; };

View File

@@ -22,7 +22,7 @@ bool register_vanilla_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_blakecoin; gate->scanhash = (void*)&scanhash_blakecoin;
gate->hash = (void*)&blakecoinhash; gate->hash = (void*)&blakecoinhash;
#endif #endif
gate->optimizations = SSE42_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->get_max64 = (void*)&blakecoin_get_max64; gate->get_max64 = (void*)&blakecoin_get_max64;
return true; return true;
} }

View File

@@ -83,7 +83,8 @@ void ExpandAESKey256(__m128i *keys, const __m128i *KeyBuf)
keys[14] = tmp1; keys[14] = tmp1;
} }
#ifdef __AVX__ #ifdef __SSE4_2__
//#ifdef __AVX__
#define AESENC(i,j) \ #define AESENC(i,j) \
State[j] = _mm_aesenc_si128(State[j], ExpandedKey[j][i]); State[j] = _mm_aesenc_si128(State[j], ExpandedKey[j][i]);

View File

@@ -199,7 +199,7 @@ bool register_hodl_algo( algo_gate_t* gate )
// return false; // return false;
// } // }
pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads ); pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
gate->optimizations = AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = AES_OPT | SSE42_OPT | AVX2_OPT;
gate->scanhash = (void*)&hodl_scanhash; gate->scanhash = (void*)&hodl_scanhash;
gate->get_new_work = (void*)&hodl_get_new_work; gate->get_new_work = (void*)&hodl_get_new_work;
gate->longpoll_rpc_call = (void*)&hodl_longpoll_rpc_call; gate->longpoll_rpc_call = (void*)&hodl_longpoll_rpc_call;

View File

@@ -17,7 +17,8 @@ void GenerateGarbageCore( CacheEntry *Garbage, int ThreadID, int ThreadCount,
const uint32_t StartChunk = ThreadID * Chunk; const uint32_t StartChunk = ThreadID * Chunk;
const uint32_t EndChunk = StartChunk + Chunk; const uint32_t EndChunk = StartChunk + Chunk;
#ifdef __AVX__ #ifdef __SSE4_2__
//#ifdef __AVX__
uint64_t* TempBufs[ SHA512_PARALLEL_N ] ; uint64_t* TempBufs[ SHA512_PARALLEL_N ] ;
uint64_t* desination[ SHA512_PARALLEL_N ]; uint64_t* desination[ SHA512_PARALLEL_N ];
@@ -63,7 +64,8 @@ void Rev256(uint32_t *Dest, const uint32_t *Src)
int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce, int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce,
uint64_t *hashes_done ) uint64_t *hashes_done )
{ {
#ifdef __AVX__ #ifdef __SSE4_2__
//#ifdef __AVX__
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf; CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf;

View File

@@ -1,5 +1,6 @@
#ifndef __AVX2__ #ifndef __AVX2__
#ifdef __AVX__ #ifdef __SSE4_2__
//#ifdef __AVX__
//Dependencies //Dependencies
#include <string.h> #include <string.h>

View File

@@ -6,7 +6,8 @@
void ExpandAESKey256(__m128i *keys, const __m128i *KeyBuf); void ExpandAESKey256(__m128i *keys, const __m128i *KeyBuf);
#ifdef __AVX__ #ifdef __SSE4_2__
//#ifdef __AVX__
#define AES_PARALLEL_N 8 #define AES_PARALLEL_N 8
#define BLOCK_COUNT 256 #define BLOCK_COUNT 256

View File

@@ -13,7 +13,7 @@ bool register_allium_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_allium; gate->scanhash = (void*)&scanhash_allium;
gate->hash = (void*)&allium_hash; gate->hash = (void*)&allium_hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
gate->set_target = (void*)&alt_set_target; gate->set_target = (void*)&alt_set_target;
gate->get_max64 = (void*)&get_max64_0xFFFFLL; gate->get_max64 = (void*)&get_max64_0xFFFFLL;
return true; return true;

View File

@@ -17,7 +17,7 @@ bool register_lyra2h_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_lyra2h; gate->scanhash = (void*)&scanhash_lyra2h;
gate->hash = (void*)&lyra2h_hash; gate->hash = (void*)&lyra2h_hash;
#endif #endif
gate->optimizations = SSE42_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL; gate->get_max64 = (void*)&get_max64_0xffffLL;
gate->set_target = (void*)&lyra2h_set_target; gate->set_target = (void*)&lyra2h_set_target;
return true; return true;

View File

@@ -132,7 +132,7 @@ void lyra2re_set_target ( struct work* work, double job_diff )
bool register_lyra2re_algo( algo_gate_t* gate ) bool register_lyra2re_algo( algo_gate_t* gate )
{ {
init_lyra2re_ctx(); init_lyra2re_ctx();
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
gate->scanhash = (void*)&scanhash_lyra2re; gate->scanhash = (void*)&scanhash_lyra2re;
gate->hash = (void*)&lyra2re_hash; gate->hash = (void*)&lyra2re_hash;
gate->get_max64 = (void*)&lyra2re_get_max64; gate->get_max64 = (void*)&lyra2re_get_max64;

View File

@@ -31,7 +31,7 @@ bool register_lyra2rev2_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_lyra2rev2; gate->scanhash = (void*)&scanhash_lyra2rev2;
gate->hash = (void*)&lyra2rev2_hash; gate->hash = (void*)&lyra2rev2_hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
gate->miner_thread_init = (void*)&lyra2rev2_thread_init; gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
gate->set_target = (void*)&lyra2rev2_set_target; gate->set_target = (void*)&lyra2rev2_set_target;
return true; return true;

View File

@@ -21,7 +21,7 @@ bool register_lyra2z_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_lyra2z; gate->scanhash = (void*)&scanhash_lyra2z;
gate->hash = (void*)&lyra2z_hash; gate->hash = (void*)&lyra2z_hash;
#endif #endif
gate->optimizations = SSE42_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL; gate->get_max64 = (void*)&get_max64_0xffffLL;
gate->set_target = (void*)&lyra2z_set_target; gate->set_target = (void*)&lyra2z_set_target;
return true; return true;

View File

@@ -69,7 +69,7 @@ bool lyra2z330_thread_init()
bool register_lyra2z330_algo( algo_gate_t* gate ) bool register_lyra2z330_algo( algo_gate_t* gate )
{ {
gate->optimizations = SSE42_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->miner_thread_init = (void*)&lyra2z330_thread_init; gate->miner_thread_init = (void*)&lyra2z330_thread_init;
gate->scanhash = (void*)&scanhash_lyra2z330; gate->scanhash = (void*)&scanhash_lyra2z330;
gate->hash = (void*)&lyra2z330_hash; gate->hash = (void*)&lyra2z330_hash;

View File

@@ -375,7 +375,7 @@ out:
bool register_m7m_algo( algo_gate_t *gate ) bool register_m7m_algo( algo_gate_t *gate )
{ {
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | SHA_OPT; gate->optimizations = SHA_OPT;
init_m7m_ctx(); init_m7m_ctx();
gate->scanhash = (void*)scanhash_m7m_hash; gate->scanhash = (void*)scanhash_m7m_hash;
gate->build_stratum_request = (void*)&std_be_build_stratum_request; gate->build_stratum_request = (void*)&std_be_build_stratum_request;

View File

@@ -11,7 +11,7 @@ bool register_deep_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_deep; gate->scanhash = (void*)&scanhash_deep;
gate->hash = (void*)&deep_hash; gate->hash = (void*)&deep_hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
return true; return true;
}; };

View File

@@ -11,7 +11,7 @@ bool register_qubit_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_qubit; gate->scanhash = (void*)&scanhash_qubit;
gate->hash = (void*)&qubit_hash; gate->hash = (void*)&qubit_hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
return true; return true;
}; };

View File

@@ -110,7 +110,7 @@ int64_t lbry_get_max64() { return 0x1ffffLL; }
bool register_lbry_algo( algo_gate_t* gate ) bool register_lbry_algo( algo_gate_t* gate )
{ {
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | SHA_OPT; gate->optimizations = AVX2_OPT | SHA_OPT;
#if defined (LBRY_8WAY) #if defined (LBRY_8WAY)
gate->scanhash = (void*)&scanhash_lbry_8way; gate->scanhash = (void*)&scanhash_lbry_8way;
gate->hash = (void*)&lbry_8way_hash; gate->hash = (void*)&lbry_8way_hash;

View File

@@ -778,7 +778,7 @@ bool scrypt_miner_thread_init( int thr_id )
bool register_scrypt_algo( algo_gate_t* gate ) bool register_scrypt_algo( algo_gate_t* gate )
{ {
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AVX2_OPT;
gate->miner_thread_init =(void*)&scrypt_miner_thread_init; gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
gate->scanhash = (void*)&scanhash_scrypt; gate->scanhash = (void*)&scanhash_scrypt;
// gate->hash = (void*)&scrypt_1024_1_1_256_24way; // gate->hash = (void*)&scrypt_1024_1_1_256_24way;

View File

@@ -373,9 +373,6 @@ sha256_8way_round( __m256i *in, __m256i r[8] )
H = r[7]; H = r[7];
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 ); SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
//printf("sha256 8 step: D= %08lx H= %08lx\n",*(uint32_t*)&D,*(uint32_t*)&H);
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 ); SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 2, 0 ); SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 2, 0 );
SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 3, 0 ); SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 3, 0 );
@@ -392,8 +389,6 @@ sha256_8way_round( __m256i *in, __m256i r[8] )
SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 14, 0 ); SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 14, 0 );
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 15, 0 ); SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 15, 0 );
//printf("sha256 8 step: A= %08lx B= %08lx\n",*(uint32_t*)&A,*(uint32_t*)&B);
for ( int j = 16; j < 64; j += 16 ) for ( int j = 16; j < 64; j += 16 )
{ {
W[ 0] = SHA2x_MEXP( 14, 9, 1, 0 ); W[ 0] = SHA2x_MEXP( 14, 9, 1, 0 );
@@ -460,17 +455,7 @@ void sha256_8way( sha256_8way_context *sc, const void *data, size_t len )
__m256i *vdata = (__m256i*)data; __m256i *vdata = (__m256i*)data;
size_t ptr; size_t ptr;
const int buf_size = 64; const int buf_size = 64;
/*
printf("sha256 8 update1: len= %d\n", len);
uint32_t* d = (uint32_t*)data;
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[128],d[136],d[144],d[152]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[160],d[168],d[176],d[184]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[192],d[200],d[208],d[216]);
*/
ptr = (unsigned)sc->count_low & (buf_size - 1U); ptr = (unsigned)sc->count_low & (buf_size - 1U);
while ( len > 0 ) while ( len > 0 )
{ {
@@ -486,24 +471,7 @@ printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[192],d[200],d[208],d[216]);
len -= clen; len -= clen;
if ( ptr == buf_size ) if ( ptr == buf_size )
{ {
/*
printf("sha256 8 update2: compress\n");
d = (uint32_t*)sc->buf;
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
d= (uint32_t*)sc->val;
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
*/
sha256_8way_round( sc->buf, sc->val ); sha256_8way_round( sc->buf, sc->val );
/*
printf("sha256 8 update3\n");
d= (uint32_t*)sc->val;
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
*/
ptr = 0; ptr = 0;
} }
clow = sc->count_low; clow = sc->count_low;
@@ -522,32 +490,13 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst )
const int pad = buf_size - 8; const int pad = buf_size - 8;
ptr = (unsigned)sc->count_low & (buf_size - 1U); ptr = (unsigned)sc->count_low & (buf_size - 1U);
/*
printf("sha256 8 close1: ptr= %d\n", ptr);
uint32_t* d = (uint32_t*)sc->buf;
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
*/
sc->buf[ ptr>>2 ] = _mm256_set1_epi32( 0x80 ); sc->buf[ ptr>>2 ] = _mm256_set1_epi32( 0x80 );
ptr += 4; ptr += 4;
if ( ptr > pad ) if ( ptr > pad )
{ {
memset_zero_256( sc->buf + (ptr>>2), (buf_size - ptr) >> 2 ); memset_zero_256( sc->buf + (ptr>>2), (buf_size - ptr) >> 2 );
//printf("sha256 8 close2: compress\n");
//uint32_t* d = (uint32_t*)sc->buf;
//printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
sha256_8way_round( sc->buf, sc->val ); sha256_8way_round( sc->buf, sc->val );
//d= (uint32_t*)sc->val;
//printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
memset_zero_256( sc->buf, pad >> 2 ); memset_zero_256( sc->buf, pad >> 2 );
} }
else else
@@ -561,23 +510,9 @@ printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
mm256_bswap_32( _mm256_set1_epi32( high ) ); mm256_bswap_32( _mm256_set1_epi32( high ) );
sc->buf[ ( pad+4 ) >> 2 ] = sc->buf[ ( pad+4 ) >> 2 ] =
mm256_bswap_32( _mm256_set1_epi32( low ) ); mm256_bswap_32( _mm256_set1_epi32( low ) );
/*
d = (uint32_t*)sc->buf;
printf("sha256 8 close3: compress\n");
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
d= (uint32_t*)sc->val;
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
*/
sha256_8way_round( sc->buf, sc->val ); sha256_8way_round( sc->buf, sc->val );
/*
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
*/
for ( u = 0; u < 8; u ++ ) for ( u = 0; u < 8; u ++ )
((__m256i*)dst)[u] = mm256_bswap_32( sc->val[u] ); ((__m256i*)dst)[u] = mm256_bswap_32( sc->val[u] );
} }

View File

@@ -11,13 +11,6 @@ bool register_sha256t_algo( algo_gate_t* gate )
#else #else
gate->scanhash = (void*)&scanhash_sha256t; gate->scanhash = (void*)&scanhash_sha256t;
gate->hash = (void*)&sha256t_hash; gate->hash = (void*)&sha256t_hash;
/*
#ifndef USE_SPH_SHA
SHA256_Init( &sha256t_ctx );
#else
sph_sha256_init( &sha256t_ctx );
#endif
*/
#endif #endif
gate->optimizations = SSE42_OPT | AVX2_OPT | SHA_OPT; gate->optimizations = SSE42_OPT | AVX2_OPT | SHA_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff; gate->get_max64 = (void*)&get_max64_0x3ffff;

View File

@@ -3,82 +3,57 @@
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include "sph_sha2.h"
#include <openssl/sha.h> #include <openssl/sha.h>
#if !defined(SHA256T_4WAY) #if !defined(SHA256T_4WAY)
#ifndef USE_SPH_SHA static __thread SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));
static __thread SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));
#else
static __thread sph_sha256_context sha256t_ctx __attribute__ ((aligned (64)));
#endif
void sha256t_midstate( const void* input ) void sha256t_midstate( const void* input )
{ {
#ifndef USE_SPH_SHA
SHA256_Init( &sha256t_ctx ); SHA256_Init( &sha256t_ctx );
SHA256_Update( &sha256t_ctx, input, 64 ); SHA256_Update( &sha256t_ctx, input, 64 );
#else
sph_sha256_init( &sha256t_ctx );
sph_sha256( &sha256t_ctx, input, 64 );
#endif
} }
void sha256t_hash( void* output, const void* input ) void sha256t_hash( void* output, const void* input )
{ {
uint32_t _ALIGN(64) hashA[16]; uint32_t _ALIGN(64) hash[16];
const int midlen = 64; // bytes const int midlen = 64; // bytes
const int tail = 80 - midlen; // 16 const int tail = 80 - midlen; // 16
#ifndef USE_SPH_SHA SHA256_CTX ctx __attribute__ ((aligned (64)));
SHA256_CTX ctx_sha256 __attribute__ ((aligned (64))); memcpy( &ctx, &sha256t_ctx, sizeof sha256t_ctx );
memcpy( &ctx_sha256, &sha256t_ctx, sizeof sha256t_ctx );
SHA256_Update( &ctx_sha256, input + midlen, tail ); SHA256_Update( &ctx, input + midlen, tail );
SHA256_Final( (unsigned char*)hashA, &ctx_sha256 ); SHA256_Final( (unsigned char*)hash, &ctx );
SHA256_Init( &ctx_sha256 ); SHA256_Init( &ctx );
SHA256_Update( &ctx_sha256, hashA, 32 ); SHA256_Update( &ctx, hash, 32 );
SHA256_Final( (unsigned char*)hashA, &ctx_sha256 ); SHA256_Final( (unsigned char*)hash, &ctx );
SHA256_Init( &ctx_sha256 ); SHA256_Init( &ctx );
SHA256_Update( &ctx_sha256, hashA, 32 ); SHA256_Update( &ctx, hash, 32 );
SHA256_Final( (unsigned char*)hashA, &ctx_sha256 ); SHA256_Final( (unsigned char*)hash, &ctx );
#else
sph_sha256_context ctx_sha256 __attribute__ ((aligned (64)));
memcpy( &ctx_sha256, &sha256t_mid, sizeof sha256t_mid );
sph_sha256( &ctx_sha256, input + midlen, tail ); memcpy( output, hash, 32 );
sph_sha256_close( &ctx_sha256, hashA );
sph_sha256_init( &ctx_sha256 );
sph_sha256( &ctx_sha256, hashA, 32 );
sph_sha256_close( &ctx_sha256, hashA );
sph_sha256_init( &ctx_sha256 );
sph_sha256( &ctx_sha256, hashA, 32 );
sph_sha256_close( &ctx_sha256, hashA );
#endif
memcpy( output, hashA, 32 );
} }
int scanhash_sha256t(int thr_id, struct work *work, int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t max_nonce, uint64_t *hashes_done) uint64_t *hashes_done)
{ {
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
uint32_t n = pdata[19] - 1; uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
#ifdef _MSC_VER #ifdef _MSC_VER
uint32_t __declspec(align(32)) hash64[8]; uint32_t __declspec(align(32)) hash64[8];
#else #else
uint32_t hash64[8] __attribute__((aligned(32))); uint32_t hash64[8] __attribute__((aligned(32)));
#endif #endif
uint32_t endiandata[32]; uint32_t endiandata[32];
uint64_t htmax[] = { uint64_t htmax[] = {
0, 0,
0xF, 0xF,
0xFF, 0xFF,
@@ -86,7 +61,7 @@ int scanhash_sha256t(int thr_id, struct work *work,
0xFFFF, 0xFFFF,
0x10000000 0x10000000
}; };
uint32_t masks[] = { uint32_t masks[] = {
0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFF0, 0xFFFFFFF0,
0xFFFFFF00, 0xFFFFFF00,
@@ -95,46 +70,33 @@ int scanhash_sha256t(int thr_id, struct work *work,
0 0
}; };
// we need bigendian data... // we need bigendian data...
for (int k = 0; k < 19; k++) for ( int k = 0; k < 19; k++ )
be32enc(&endiandata[k], pdata[k]); be32enc( &endiandata[k], pdata[k] );
sha256t_midstate( endiandata ); sha256t_midstate( endiandata );
#ifdef DEBUG_ALGO for ( int m = 0; m < 6; m++ )
if (Htarg != 0) {
printf("[%d] Htarg=%X\n", thr_id, Htarg); if ( Htarg <= htmax[m] )
#endif {
for (int m=0; m < 6; m++) { uint32_t mask = masks[m];
if (Htarg <= htmax[m]) { do {
uint32_t mask = masks[m]; pdata[19] = ++n;
do { be32enc(&endiandata[19], n);
pdata[19] = ++n; sha256t_hash( hash64, endiandata );
be32enc(&endiandata[19], n); if ( ( !(hash64[7] & mask) ) && fulltest( hash64, ptarget ) )
sha256t_hash( hash64, endiandata ); {
#ifndef DEBUG_ALGO *hashes_done = n - first_nonce + 1;
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) { return true;
*hashes_done = n - first_nonce + 1; }
return true; } while ( n < max_nonce && !work_restart[thr_id].restart );
} break;
#else }
if (!(n % 0x1000) && !thr_id) printf("."); }
if (!(hash64[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
}
#endif
} while (n < max_nonce && !work_restart[thr_id].restart);
// see blake.c if else to understand the loop on htmax => mask
break;
}
}
*hashes_done = n - first_nonce + 1; *hashes_done = n - first_nonce + 1;
pdata[19] = n; pdata[19] = n;
return 0; return 0;
} }
#endif #endif

View File

@@ -59,17 +59,28 @@ static const sph_u32 IV512[] = {
C32(0xE275EADE), C32(0x502D9FCD), C32(0xB9357178), C32(0x022A4B9A) C32(0xE275EADE), C32(0x502D9FCD), C32(0xB9357178), C32(0x022A4B9A)
}; };
// Partially rotate elements in two 128 bit vectors as one 256 bit vector // Partially rotate elements in two 128 bit vectors a & b as one 256 bit vector
// and return the rotated high 128 bits. // and return the rotated 128 bit vector a.
// a[3:0] = { b[0], a[3], a[2], a[1] }
#if defined(__SSSE3__) #if defined(__SSSE3__)
#define mm_ror256hi_1x32( hi, lo ) _mm_alignr_epi8( lo, hi, 4 ) #define mm_ror256hi_1x32( a, b ) _mm_alignr_epi8( b, a, 4 )
#else // SSE2 #else // SSE2
#define mm_ror256hi_1x32( hi, lo ) \ #define mm_ror256hi_1x32( a, b ) \
_mm_or_si128( _mm_srli_si128( hi, 4 ), \ _mm_or_si128( _mm_srli_si128( a, 4 ), \
_mm_slli_si128( lo, 12 ) ) _mm_slli_si128( b, 12 ) )
#endif
#if defined(__AVX2__)
// 2 way version of above
// a[7:0] = { b[4], a[7], a[6], a[5], b[0], a[3], a[2], a[1] }
#define mm256_ror2x256hi_1x32( a, b ) \
_mm256_blend_epi32( mm256_ror256_1x32( a ), \
mm256_rol256_3x32( b ), 0x88 )
#endif #endif

View File

@@ -11,7 +11,7 @@ bool register_c11_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_c11; gate->scanhash = (void*)&scanhash_c11;
gate->hash = (void*)&c11_hash; gate->hash = (void*)&c11_hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff; gate->get_max64 = (void*)&get_max64_0x3ffff;
return true; return true;
}; };

View File

@@ -17,7 +17,7 @@ bool register_timetravel_algo( algo_gate_t* gate )
gate->hash = (void*)&timetravel_hash; gate->hash = (void*)&timetravel_hash;
#endif #endif
gate->set_target = (void*)&tt8_set_target; gate->set_target = (void*)&tt8_set_target;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL; gate->get_max64 = (void*)&get_max64_0xffffLL;
return true; return true;
}; };

View File

@@ -17,7 +17,7 @@ bool register_timetravel10_algo( algo_gate_t* gate )
gate->hash = (void*)&timetravel10_hash; gate->hash = (void*)&timetravel10_hash;
#endif #endif
gate->set_target = (void*)&tt10_set_target; gate->set_target = (void*)&tt10_set_target;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL; gate->get_max64 = (void*)&get_max64_0xffffLL;
return true; return true;
}; };

View File

@@ -11,7 +11,7 @@ bool register_x11_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x11; gate->scanhash = (void*)&scanhash_x11;
gate->hash = (void*)&x11_hash; gate->hash = (void*)&x11_hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff; gate->get_max64 = (void*)&get_max64_0x3ffff;
return true; return true;
}; };

View File

@@ -89,7 +89,7 @@ bool register_x11evo_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x11evo; gate->scanhash = (void*)&scanhash_x11evo;
gate->hash = (void*)&x11evo_hash; gate->hash = (void*)&x11evo_hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
return true; return true;
}; };

View File

@@ -11,7 +11,7 @@ bool register_x11gost_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x11gost; gate->scanhash = (void*)&scanhash_x11gost;
gate->hash = (void*)&x11gost_hash; gate->hash = (void*)&x11gost_hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff; gate->get_max64 = (void*)&get_max64_0x3ffff;
return true; return true;
}; };

View File

@@ -2,7 +2,7 @@
bool register_skunk_algo( algo_gate_t* gate ) bool register_skunk_algo( algo_gate_t* gate )
{ {
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AVX2_OPT;
#if defined (SKUNK_4WAY) #if defined (SKUNK_4WAY)
gate->miner_thread_init = (void*)&skunk_4way_thread_init; gate->miner_thread_init = (void*)&skunk_4way_thread_init;
gate->scanhash = (void*)&scanhash_skunk_4way; gate->scanhash = (void*)&scanhash_skunk_4way;

View File

@@ -2,7 +2,7 @@
bool register_polytimos_algo( algo_gate_t* gate ) bool register_polytimos_algo( algo_gate_t* gate )
{ {
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
#ifdef POLYTIMOS_4WAY #ifdef POLYTIMOS_4WAY
init_polytimos_4way_ctx(); init_polytimos_4way_ctx();
gate->scanhash = (void*)&scanhash_polytimos_4way; gate->scanhash = (void*)&scanhash_polytimos_4way;

View File

@@ -11,7 +11,7 @@ bool register_veltor_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_veltor; gate->scanhash = (void*)&scanhash_veltor;
gate->hash = (void*)&veltor_hash; gate->hash = (void*)&veltor_hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff; gate->get_max64 = (void*)&get_max64_0x3ffff;
return true; return true;
}; };

View File

@@ -11,7 +11,7 @@ bool register_x14_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x14; gate->scanhash = (void*)&scanhash_x14;
gate->hash = (void*)&x14hash; gate->hash = (void*)&x14hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff; gate->get_max64 = (void*)&get_max64_0x3ffff;
return true; return true;
}; };

View File

@@ -11,7 +11,7 @@ bool register_x15_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x15; gate->scanhash = (void*)&scanhash_x15;
gate->hash = (void*)&x15hash; gate->hash = (void*)&x15hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
return true; return true;
}; };

View File

@@ -429,7 +429,7 @@ int scanhash_hmq1725( int thr_id, struct work *work, int32_t max_nonce,
bool register_hmq1725_algo( algo_gate_t* gate ) bool register_hmq1725_algo( algo_gate_t* gate )
{ {
init_hmq1725_ctx(); init_hmq1725_ctx();
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | SHA_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
gate->set_target = (void*)&scrypt_set_target; gate->set_target = (void*)&scrypt_set_target;
gate->scanhash = (void*)&scanhash_hmq1725; gate->scanhash = (void*)&scanhash_hmq1725;
gate->hash = (void*)&hmq1725hash; gate->hash = (void*)&hmq1725hash;

View File

@@ -86,7 +86,7 @@ void x16r_4way_hash( void* output, const void* input )
if ( s_ntime == UINT32_MAX ) if ( s_ntime == UINT32_MAX )
{ {
const uint8_t* tmp = (uint8_t*) in0; const uint8_t* tmp = (uint8_t*) in0;
x16r_getAlgoString( &tmp[4], hashOrder ); x16_r_s_getAlgoString( &tmp[4], hashOrder );
} }
// Input data is both 64 bit interleaved (input) // Input data is both 64 bit interleaved (input)
@@ -321,10 +321,11 @@ int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce,
for ( int k=0; k < 19; k++ ) for ( int k=0; k < 19; k++ )
be32enc( &endiandata[k], pdata[k] ); be32enc( &endiandata[k], pdata[k] );
if ( s_ntime != pdata[17] ) // if ( s_ntime != pdata[17] )
if ( s_ntime != endiandata[17] )
{ {
uint32_t ntime = swab32(pdata[17]); uint32_t ntime = swab32(pdata[17]);
x16r_getAlgoString( (const char*) (&endiandata[1]), hashOrder ); x16_r_s_getAlgoString( (const char*) (&endiandata[1]), hashOrder );
s_ntime = ntime; s_ntime = ntime;
if ( opt_debug && !thr_id ) if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime ); applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );

View File

@@ -1,6 +1,6 @@
#include "x16r-gate.h" #include "x16r-gate.h"
void x16r_getAlgoString( const uint8_t* prevblock, char *output ) void x16r_getAlgoString( const char* prevblock, char *output )
{ {
char *sptr = output; char *sptr = output;
for ( int j = 0; j < X16R_HASH_FUNC_COUNT; j++ ) for ( int j = 0; j < X16R_HASH_FUNC_COUNT; j++ )
@@ -16,6 +16,22 @@ void x16r_getAlgoString( const uint8_t* prevblock, char *output )
*sptr = '\0'; *sptr = '\0';
} }
void x16s_getAlgoString( const char* prevblock, char *output )
{
uint8_t* data = (uint8_t*)prevblock;
strcpy( output, "0123456789ABCDEF" );
for ( int i = 0; i < 16; i++ )
{
uint8_t b = (15 - i) >> 1; // 16 ascii hex chars, reversed
uint8_t algoDigit = (i & 1) ? data[b] & 0xF : data[b] >> 4;
int offset = algoDigit;
// insert the nth character at the front
char oldVal = output[offset];
for( int j = offset; j-- > 0; )
output[j+1] = output[j];
output[0] = oldVal;
}
}
bool register_x16r_algo( algo_gate_t* gate ) bool register_x16r_algo( algo_gate_t* gate )
{ {
@@ -28,8 +44,26 @@ bool register_x16r_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x16r; gate->scanhash = (void*)&scanhash_x16r;
gate->hash = (void*)&x16r_hash; gate->hash = (void*)&x16r_hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->set_target = (void*)&alt_set_target; gate->set_target = (void*)&alt_set_target;
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
return true;
};
bool register_x16s_algo( algo_gate_t* gate )
{
#if defined (X16R_4WAY)
init_x16r_4way_ctx();
gate->scanhash = (void*)&scanhash_x16r_4way;
gate->hash = (void*)&x16r_4way_hash;
#else
init_x16r_ctx();
gate->scanhash = (void*)&scanhash_x16r;
gate->hash = (void*)&x16r_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->set_target = (void*)&alt_set_target;
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
return true; return true;
}; };

View File

@@ -29,8 +29,12 @@ enum x16r_Algo {
X16R_HASH_FUNC_COUNT X16R_HASH_FUNC_COUNT
}; };
bool (*x16_r_s_getAlgoString) ( const char*, char* );
void x16r_getAlgoString( const char* prevblock, char *output );
void x16s_getAlgoString( const char* prevblock, char *output );
bool register_x16r_algo( algo_gate_t* gate ); bool register_x16r_algo( algo_gate_t* gate );
void x16r_getAlgoString( const uint8_t* prevblock, char *output ); bool register_x16s_algo( algo_gate_t* gate );
#if defined(X16R_4WAY) #if defined(X16R_4WAY)

View File

@@ -43,7 +43,7 @@ typedef struct {
#endif #endif
sph_blake512_context blake; sph_blake512_context blake;
sph_bmw512_context bmw; sph_bmw512_context bmw;
sph_skein512_context skein; sph_skein512_context skein;
sph_jh512_context jh; sph_jh512_context jh;
sph_keccak512_context keccak; sph_keccak512_context keccak;
hashState_luffa luffa; hashState_luffa luffa;
@@ -61,27 +61,7 @@ x16r_ctx_holder x16r_ctx __attribute__ ((aligned (64)));
void init_x16r_ctx() void init_x16r_ctx()
{ {
//#ifdef NO_AES_NI
// sph_groestl512_init(&x16r_ctx.groestl );
// sph_echo512_init(&x16r_ctx.echo);
//#else
// init_echo( &x16r_ctx.echo, 512 );
// init_groestl( &x16r_ctx.groestl, 64 );
//#endif
// sph_blake512_init( &x16r_ctx.blake );
// sph_bmw512_init( &x16r_ctx.bmw );
// sph_skein512_init( &x16r_ctx.bmw );
// sph_jh512_init( &x16r_ctx.jh );
// sph_keccak512_init( &x16r_ctx.keccak );
// init_luffa( &x16r_ctx.luffa, 512 );
cubehashInit( &x16r_ctx.cube, 512, 16, 32 ); cubehashInit( &x16r_ctx.cube, 512, 16, 32 );
// sph_shavite512_init( &x16r_ctx.shavite );
// init_sd( &x16r_ctx.simd, 512 );
// sph_hamsi512_init( &x16r_ctx.hamsi );
// sph_fugue512_init( &x16r_ctx.fugue );
// sph_shabal512_init( &x16r_ctx.shabal );
// sph_whirlpool_init( &x16r_ctx.whirlpool );
// SHA512_Init( &x16r_ctx.sha512 );
}; };
void x16r_hash( void* output, const void* input ) void x16r_hash( void* output, const void* input )
@@ -94,7 +74,7 @@ void x16r_hash( void* output, const void* input )
if ( s_ntime == UINT32_MAX ) if ( s_ntime == UINT32_MAX )
{ {
const uint8_t* in8 = (uint8_t*) input; const uint8_t* in8 = (uint8_t*) input;
x16r_getAlgoString( &in8[4], hashOrder ); x16_r_s_getAlgoString( &in8[4], hashOrder );
} }
for ( int i = 0; i < 16; i++ ) for ( int i = 0; i < 16; i++ )
@@ -218,10 +198,14 @@ int scanhash_x16r( int thr_id, struct work *work, uint32_t max_nonce,
for ( int k=0; k < 19; k++ ) for ( int k=0; k < 19; k++ )
be32enc( &endiandata[k], pdata[k] ); be32enc( &endiandata[k], pdata[k] );
// This code is suspicious. s_ntime is saved after byteswapping pdata[17]
// but is tested vs unswapped pdata[17]. This should result in calling
// getAlgoString every pass, but that doesn't seem to be the case.
// It appears to be working correctly as is.
if ( s_ntime != pdata[17] ) if ( s_ntime != pdata[17] )
{ {
uint32_t ntime = swab32(pdata[17]); uint32_t ntime = swab32(pdata[17]);
x16r_getAlgoString( (const char*) (&endiandata[1]), hashOrder ); x16_r_s_getAlgoString( (const char*) (&endiandata[1]), hashOrder );
s_ntime = ntime; s_ntime = ntime;
if ( opt_debug && !thr_id ) if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime ); applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );

View File

@@ -11,7 +11,7 @@ bool register_x17_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x17; gate->scanhash = (void*)&scanhash_x17;
gate->hash = (void*)&x17_hash; gate->hash = (void*)&x17_hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
return true; return true;
}; };

View File

@@ -16,7 +16,7 @@ bool register_xevan_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_xevan; gate->scanhash = (void*)&scanhash_xevan;
gate->hash = (void*)&xevan_hash; gate->hash = (void*)&xevan_hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->set_target = (void*)&xevan_set_target; gate->set_target = (void*)&xevan_set_target;
gate->get_max64 = (void*)&get_max64_0xffffLL; gate->get_max64 = (void*)&get_max64_0xffffLL;
return true; return true;

View File

@@ -427,7 +427,7 @@ int64_t yescryptr16_get_max64()
void yescrypt_gate_base(algo_gate_t *gate ) void yescrypt_gate_base(algo_gate_t *gate )
{ {
gate->optimizations = SSE2_OPT | AVX_OPT | SHA_OPT; gate->optimizations = SSE2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_yescrypt; gate->scanhash = (void*)&scanhash_yescrypt;
gate->hash = (void*)&yescrypt_hash; gate->hash = (void*)&yescrypt_hash;
gate->set_target = (void*)&scrypt_set_target; gate->set_target = (void*)&scrypt_set_target;

View File

@@ -1,20 +1,13 @@
#ifndef AVXDEFS_H__ #ifndef AVXDEFS_H__
#define AVXDEFS_H__ #define AVXDEFS_H__
// Some tools to help using AVX and AVX2. // Some tools to help using SIMD vectors.
// //
// The baseline requirements for these utilities is AVX for 128 bit vectors // The baseline requirements for these utilities is SSE2 for 128 bit vectors
// and AVX2 for 256 bit vectors. However most of the 128 bit code requires // and AVX2 for 256 bit vectors.
// only SSE2 with a couple of exceptions. This provides full support for
// Intel Core2.
//
// SSSE3 is required for mm_shuffle_epi8 used by bswap functions which is
// included in Core2 but not some AMD architectures.
//
// SSE4.1 is required for _mm_blend_epi16 used by some rotate functions.
// //
// Slower versions of these functions are automatically selected at compile // Some 128 bit functions have SSSE3 or SSE4.2 implementations that are
// time. // more efficient on capable CPUs.
// //
// AVX512F has more powerful 256 bit instructions but with 512 bit vectors // AVX512F has more powerful 256 bit instructions but with 512 bit vectors
// available there is little reason to use the 256 bit enhancements. // available there is little reason to use the 256 bit enhancements.
@@ -159,6 +152,11 @@ static inline __m128i foo()
// These can't be used for compile time initialization. // These can't be used for compile time initialization.
// These should be used for all simple vectors. Use above for // These should be used for all simple vectors. Use above for
// vector array initializing. // vector array initializing.
//
// _mm_setzero_si128 uses pxor instruction, it's unclear what _mm_set_epi does.
// If a pseudo constant is used repeatedly in a function it may be worthwhile
// to define a register variable to represent that constant.
// register __m128i zero = mm_zero;
// Constant zero // Constant zero
#define m128_zero _mm_setzero_si128() #define m128_zero _mm_setzero_si128()
@@ -425,7 +423,7 @@ do { \
v1 = t; \ v1 = t; \
} while(0) } while(0)
/*
// No comparable rol. // No comparable rol.
#define mm_ror256_1x16( v1, v2 ) \ #define mm_ror256_1x16( v1, v2 ) \
do { \ do { \
@@ -433,8 +431,8 @@ do { \
v1 = _mm_alignr_epi8( v2, v1, 2 ); \ v1 = _mm_alignr_epi8( v2, v1, 2 ); \
v2 = t; \ v2 = t; \
} while(0) } while(0)
*/
/*
#define mm_ror256_1x16( v1, v2 ) \ #define mm_ror256_1x16( v1, v2 ) \
do { \ do { \
__m128i t; \ __m128i t; \
@@ -444,6 +442,7 @@ do { \
v2 = _mm_blend_epi16( v1, v2, 0x01 ); \ v2 = _mm_blend_epi16( v1, v2, 0x01 ); \
v1 = t; \ v1 = t; \
} while(0) } while(0)
*/
#define mm_rol256_1x16( v1, v2 ) \ #define mm_rol256_1x16( v1, v2 ) \
do { \ do { \
@@ -888,6 +887,41 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, int n )
#define mm256_ror512_1x128(v1, v2) _mm256_permute2x128_si256( v1, v2, 0x39 ) #define mm256_ror512_1x128(v1, v2) _mm256_permute2x128_si256( v1, v2, 0x39 )
#define mm256_rol512_1x128(v1, v2) _mm256_permute2x128_si256( v1, v2, 0x93 ) #define mm256_rol512_1x128(v1, v2) _mm256_permute2x128_si256( v1, v2, 0x93 )
// No comparable rol.
#define mm256_ror512_1x64( v1, v2 ) \
do { \
__m256i t = _mm256_alignr_epi8( v1, v2, 8 ); \
v1 = _mm256_alignr_epi8( v2, v1, 8 ); \
v2 = t; \
} while(0)
#define mm256_rol512_1x64( v1, v2 ) \
do { \
__m256i t; \
v1 = mm256_rol_1x64( v1 ); \
v2 = mm256_rol_1x64( v2 ); \
t = _mm256_blend_epi32( v1, v2, 0x03 ); \
v2 = _mm256_blend_epi32( v1, v2, 0xFC ); \
v1 = t; \
} while(0)
#define mm256_ror512_1x32( v1, v2 ) \
do { \
__m256i t = _mm256_alignr_epi8( v1, v2, 4 ); \
v1 = _mm256_alignr_epi8( v2, v1, 4 ); \
v2 = t; \
} while(0)
#define mm256_rol512_1x32( v1, v2 ) \
do { \
__m256i t; \
v1 = mm256_rol_1x32( v1 ); \
v2 = mm256_rol_1x32( v2 ); \
t = _mm256_blend_epi32( v1, v2, 0x01 ); \
v2 = _mm256_blend_epi32( v1, v2, 0xFE ); \
v1 = t; \
} while(0)
// //
// Swap bytes in vector elements // Swap bytes in vector elements
@@ -914,7 +948,7 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, int n )
// usefulness tbd // usefulness tbd
// __m128i hi, __m128i lo, returns __m256i // __m128i hi, __m128i lo, returns __m256i
#define mm256_pack_2x128( hi, lo ) \ #define mm256_pack_2x128( hi, lo ) \
_mm256_inserti128_si256( _mm256_castsi128_si256( hi ), lo, 0 ) \ _mm256_inserti128_si256( _mm256_castsi128_si256( lo ), hi, 1 ) \
// __m128i hi, __m128i lo, __m256i src // __m128i hi, __m128i lo, __m256i src
#define mm256_unpack_2x128( hi, lo, src ) \ #define mm256_unpack_2x128( hi, lo, src ) \

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.5. # Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.6.
# #
# #
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='cpuminer-opt' PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.8.5' PACKAGE_VERSION='3.8.6'
PACKAGE_STRING='cpuminer-opt 3.8.5' PACKAGE_STRING='cpuminer-opt 3.8.6'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures cpuminer-opt 3.8.5 to adapt to many kinds of systems. \`configure' configures cpuminer-opt 3.8.6 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1392,7 +1392,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.8.5:";; short | recursive ) echo "Configuration of cpuminer-opt 3.8.6:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1497,7 +1497,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
cpuminer-opt configure 3.8.5 cpuminer-opt configure 3.8.6
generated by GNU Autoconf 2.69 generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc. Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.8.5, which was It was created by cpuminer-opt $as_me 3.8.6, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@ $ $0 $@
@@ -2981,7 +2981,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='cpuminer-opt' PACKAGE='cpuminer-opt'
VERSION='3.8.5' VERSION='3.8.6'
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by cpuminer-opt $as_me 3.8.5, which was This file was extended by cpuminer-opt $as_me 3.8.6, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -6743,7 +6743,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
cpuminer-opt config.status 3.8.5 cpuminer-opt config.status 3.8.6
configured by $0, generated by GNU Autoconf 2.69, configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.8.5]) AC_INIT([cpuminer-opt], [3.8.6])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

View File

@@ -550,6 +550,7 @@ enum algos {
ALGO_X14, ALGO_X14,
ALGO_X15, ALGO_X15,
ALGO_X16R, ALGO_X16R,
ALGO_X16S,
ALGO_X17, ALGO_X17,
ALGO_XEVAN, ALGO_XEVAN,
ALGO_YESCRYPT, ALGO_YESCRYPT,
@@ -629,6 +630,7 @@ static const char* const algo_names[] = {
"x14", "x14",
"x15", "x15",
"x16r", "x16r",
"x16s",
"x17", "x17",
"xevan", "xevan",
"yescrypt", "yescrypt",
@@ -767,6 +769,7 @@ Options:\n\
x14 X14\n\ x14 X14\n\
x15 X15\n\ x15 X15\n\
x16r Ravencoin (RVN)\n\ x16r Ravencoin (RVN)\n\
x16s Pigeoncoin (PGN)\n\
x17\n\ x17\n\
xevan Bitsend (BSD)\n\ xevan Bitsend (BSD)\n\
yescrypt Globlboost-Y (BSTY)\n\ yescrypt Globlboost-Y (BSTY)\n\