This commit is contained in:
Jay D Dee
2018-03-31 12:50:52 -04:00
parent f449c6725f
commit dd5e552357
51 changed files with 241 additions and 265 deletions

View File

@@ -107,9 +107,10 @@ Supported Algorithms
x13sm3 hsr (Hshare)
x14 X14
x15 X15
x16r Ravencoin
x16r Ravencoin (RVN)
x16s pigeoncoin (PGN)
x17
xevan Bitsend
xevan Bitsend (BSD)
yescrypt Globalboost-Y (BSTY)
yescryptr8 BitZeny (ZNY)
yescryptr16 Yenten (YTN)
@@ -119,6 +120,8 @@ Supported Algorithms
Errata
------
Neoscrypt crashes on Windows, use legacy version.
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
these CPUs. Some algos may crash the miner with an invalid instruction.

View File

@@ -160,6 +160,12 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
Change Log
----------
v3.8.6
Fixed argon2 regression in v3.8.5.
Added x16s algo for Pigeoncoin.
Some code cleanup.
v3.8.5
Added argon2d-crds and argon2d-dyn algos.

View File

@@ -224,6 +224,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_X14: register_x14_algo ( gate ); break;
case ALGO_X15: register_x15_algo ( gate ); break;
case ALGO_X16R: register_x16r_algo ( gate ); break;
case ALGO_X16S: register_x16s_algo ( gate ); break;
case ALGO_X17: register_x17_algo ( gate ); break;
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;

View File

@@ -295,7 +295,7 @@ void ar2_initial_hash(uint8_t *blockhash, argon2_context *context,
store32(&value, ADLEN);
my_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
ar2_blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
}
int ar2_initialize(argon2_instance_t *instance, argon2_context *context) {

View File

@@ -70,7 +70,7 @@ bool register_argon2d_crds_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_argon2d_crds;
gate->hash = (void*)&argon2d_crds_hash;
gate->set_target = (void*)&scrypt_set_target;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
}
// Dynamic
@@ -138,6 +138,6 @@ bool register_argon2d_dyn_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_argon2d_dyn;
gate->hash = (void*)&argon2d_dyn_hash;
gate->set_target = (void*)&scrypt_set_target;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
}

View File

@@ -10,7 +10,7 @@ bool register_blake_algo( algo_gate_t* gate )
gate->optimizations = AVX2_OPT;
gate->get_max64 = (void*)&blake_get_max64;
//#if defined (__AVX2__) && defined (FOUR_WAY)
// gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
// gate->optimizations = SSE2_OPT | AVX2_OPT;
// gate->scanhash = (void*)&scanhash_blake_8way;
// gate->hash = (void*)&blakehash_8way;
#if defined(BLAKE_4WAY)

View File

@@ -20,7 +20,7 @@ bool register_blake2s_algo( algo_gate_t* gate )
gate->hash = (void*)&blake2s_hash;
#endif
gate->get_max64 = (void*)&blake2s_get_max64;
gate->optimizations = SSE42_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE42_OPT | AVX2_OPT;
return true;
};

View File

@@ -22,7 +22,7 @@ bool register_vanilla_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_blakecoin;
gate->hash = (void*)&blakecoinhash;
#endif
gate->optimizations = SSE42_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->get_max64 = (void*)&blakecoin_get_max64;
return true;
}

View File

@@ -83,7 +83,8 @@ void ExpandAESKey256(__m128i *keys, const __m128i *KeyBuf)
keys[14] = tmp1;
}
#ifdef __AVX__
#ifdef __SSE4_2__
//#ifdef __AVX__
#define AESENC(i,j) \
State[j] = _mm_aesenc_si128(State[j], ExpandedKey[j][i]);

View File

@@ -199,7 +199,7 @@ bool register_hodl_algo( algo_gate_t* gate )
// return false;
// }
pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
gate->optimizations = AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = AES_OPT | SSE42_OPT | AVX2_OPT;
gate->scanhash = (void*)&hodl_scanhash;
gate->get_new_work = (void*)&hodl_get_new_work;
gate->longpoll_rpc_call = (void*)&hodl_longpoll_rpc_call;

View File

@@ -17,7 +17,8 @@ void GenerateGarbageCore( CacheEntry *Garbage, int ThreadID, int ThreadCount,
const uint32_t StartChunk = ThreadID * Chunk;
const uint32_t EndChunk = StartChunk + Chunk;
#ifdef __AVX__
#ifdef __SSE4_2__
//#ifdef __AVX__
uint64_t* TempBufs[ SHA512_PARALLEL_N ] ;
uint64_t* desination[ SHA512_PARALLEL_N ];
@@ -63,7 +64,8 @@ void Rev256(uint32_t *Dest, const uint32_t *Src)
int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce,
uint64_t *hashes_done )
{
#ifdef __AVX__
#ifdef __SSE4_2__
//#ifdef __AVX__
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf;

View File

@@ -1,5 +1,6 @@
#ifndef __AVX2__
#ifdef __AVX__
#ifdef __SSE4_2__
//#ifdef __AVX__
//Dependencies
#include <string.h>

View File

@@ -6,7 +6,8 @@
void ExpandAESKey256(__m128i *keys, const __m128i *KeyBuf);
#ifdef __AVX__
#ifdef __SSE4_2__
//#ifdef __AVX__
#define AES_PARALLEL_N 8
#define BLOCK_COUNT 256

View File

@@ -13,7 +13,7 @@ bool register_allium_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_allium;
gate->hash = (void*)&allium_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
gate->set_target = (void*)&alt_set_target;
gate->get_max64 = (void*)&get_max64_0xFFFFLL;
return true;

View File

@@ -17,7 +17,7 @@ bool register_lyra2h_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_lyra2h;
gate->hash = (void*)&lyra2h_hash;
#endif
gate->optimizations = SSE42_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL;
gate->set_target = (void*)&lyra2h_set_target;
return true;

View File

@@ -132,7 +132,7 @@ void lyra2re_set_target ( struct work* work, double job_diff )
bool register_lyra2re_algo( algo_gate_t* gate )
{
init_lyra2re_ctx();
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
gate->scanhash = (void*)&scanhash_lyra2re;
gate->hash = (void*)&lyra2re_hash;
gate->get_max64 = (void*)&lyra2re_get_max64;

View File

@@ -31,7 +31,7 @@ bool register_lyra2rev2_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_lyra2rev2;
gate->hash = (void*)&lyra2rev2_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
gate->set_target = (void*)&lyra2rev2_set_target;
return true;

View File

@@ -21,7 +21,7 @@ bool register_lyra2z_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_lyra2z;
gate->hash = (void*)&lyra2z_hash;
#endif
gate->optimizations = SSE42_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL;
gate->set_target = (void*)&lyra2z_set_target;
return true;

View File

@@ -69,7 +69,7 @@ bool lyra2z330_thread_init()
bool register_lyra2z330_algo( algo_gate_t* gate )
{
gate->optimizations = SSE42_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->miner_thread_init = (void*)&lyra2z330_thread_init;
gate->scanhash = (void*)&scanhash_lyra2z330;
gate->hash = (void*)&lyra2z330_hash;

View File

@@ -375,7 +375,7 @@ out:
bool register_m7m_algo( algo_gate_t *gate )
{
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
gate->optimizations = SHA_OPT;
init_m7m_ctx();
gate->scanhash = (void*)scanhash_m7m_hash;
gate->build_stratum_request = (void*)&std_be_build_stratum_request;

View File

@@ -11,7 +11,7 @@ bool register_deep_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_deep;
gate->hash = (void*)&deep_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
return true;
};

View File

@@ -11,7 +11,7 @@ bool register_qubit_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_qubit;
gate->hash = (void*)&qubit_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
return true;
};

View File

@@ -110,7 +110,7 @@ int64_t lbry_get_max64() { return 0x1ffffLL; }
bool register_lbry_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
gate->optimizations = AVX2_OPT | SHA_OPT;
#if defined (LBRY_8WAY)
gate->scanhash = (void*)&scanhash_lbry_8way;
gate->hash = (void*)&lbry_8way_hash;

View File

@@ -778,7 +778,7 @@ bool scrypt_miner_thread_init( int thr_id )
bool register_scrypt_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AVX2_OPT;
gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
gate->scanhash = (void*)&scanhash_scrypt;
// gate->hash = (void*)&scrypt_1024_1_1_256_24way;

View File

@@ -373,9 +373,6 @@ sha256_8way_round( __m256i *in, __m256i r[8] )
H = r[7];
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
//printf("sha256 8 step: D= %08lx H= %08lx\n",*(uint32_t*)&D,*(uint32_t*)&H);
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 2, 0 );
SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 3, 0 );
@@ -392,8 +389,6 @@ sha256_8way_round( __m256i *in, __m256i r[8] )
SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 14, 0 );
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 15, 0 );
//printf("sha256 8 step: A= %08lx B= %08lx\n",*(uint32_t*)&A,*(uint32_t*)&B);
for ( int j = 16; j < 64; j += 16 )
{
W[ 0] = SHA2x_MEXP( 14, 9, 1, 0 );
@@ -460,17 +455,7 @@ void sha256_8way( sha256_8way_context *sc, const void *data, size_t len )
__m256i *vdata = (__m256i*)data;
size_t ptr;
const int buf_size = 64;
/*
printf("sha256 8 update1: len= %d\n", len);
uint32_t* d = (uint32_t*)data;
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[128],d[136],d[144],d[152]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[160],d[168],d[176],d[184]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[192],d[200],d[208],d[216]);
*/
ptr = (unsigned)sc->count_low & (buf_size - 1U);
while ( len > 0 )
{
@@ -486,24 +471,7 @@ printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[192],d[200],d[208],d[216]);
len -= clen;
if ( ptr == buf_size )
{
/*
printf("sha256 8 update2: compress\n");
d = (uint32_t*)sc->buf;
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
d= (uint32_t*)sc->val;
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
*/
sha256_8way_round( sc->buf, sc->val );
/*
printf("sha256 8 update3\n");
d= (uint32_t*)sc->val;
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
*/
ptr = 0;
}
clow = sc->count_low;
@@ -522,32 +490,13 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst )
const int pad = buf_size - 8;
ptr = (unsigned)sc->count_low & (buf_size - 1U);
/*
printf("sha256 8 close1: ptr= %d\n", ptr);
uint32_t* d = (uint32_t*)sc->buf;
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
*/
sc->buf[ ptr>>2 ] = _mm256_set1_epi32( 0x80 );
ptr += 4;
if ( ptr > pad )
{
memset_zero_256( sc->buf + (ptr>>2), (buf_size - ptr) >> 2 );
//printf("sha256 8 close2: compress\n");
//uint32_t* d = (uint32_t*)sc->buf;
//printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
sha256_8way_round( sc->buf, sc->val );
//d= (uint32_t*)sc->val;
//printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
memset_zero_256( sc->buf, pad >> 2 );
}
else
@@ -561,23 +510,9 @@ printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
mm256_bswap_32( _mm256_set1_epi32( high ) );
sc->buf[ ( pad+4 ) >> 2 ] =
mm256_bswap_32( _mm256_set1_epi32( low ) );
/*
d = (uint32_t*)sc->buf;
printf("sha256 8 close3: compress\n");
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
d= (uint32_t*)sc->val;
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
*/
sha256_8way_round( sc->buf, sc->val );
/*
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
*/
for ( u = 0; u < 8; u ++ )
((__m256i*)dst)[u] = mm256_bswap_32( sc->val[u] );
}

View File

@@ -11,13 +11,6 @@ bool register_sha256t_algo( algo_gate_t* gate )
#else
gate->scanhash = (void*)&scanhash_sha256t;
gate->hash = (void*)&sha256t_hash;
/*
#ifndef USE_SPH_SHA
SHA256_Init( &sha256t_ctx );
#else
sph_sha256_init( &sha256t_ctx );
#endif
*/
#endif
gate->optimizations = SSE42_OPT | AVX2_OPT | SHA_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;

View File

@@ -3,68 +3,43 @@
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "sph_sha2.h"
#include <openssl/sha.h>
#if !defined(SHA256T_4WAY)
#ifndef USE_SPH_SHA
static __thread SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));
#else
static __thread sph_sha256_context sha256t_ctx __attribute__ ((aligned (64)));
#endif
static __thread SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));
void sha256t_midstate( const void* input )
{
#ifndef USE_SPH_SHA
SHA256_Init( &sha256t_ctx );
SHA256_Update( &sha256t_ctx, input, 64 );
#else
sph_sha256_init( &sha256t_ctx );
sph_sha256( &sha256t_ctx, input, 64 );
#endif
}
void sha256t_hash( void* output, const void* input )
{
uint32_t _ALIGN(64) hashA[16];
uint32_t _ALIGN(64) hash[16];
const int midlen = 64; // bytes
const int tail = 80 - midlen; // 16
#ifndef USE_SPH_SHA
SHA256_CTX ctx_sha256 __attribute__ ((aligned (64)));
memcpy( &ctx_sha256, &sha256t_ctx, sizeof sha256t_ctx );
SHA256_CTX ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &sha256t_ctx, sizeof sha256t_ctx );
SHA256_Update( &ctx_sha256, input + midlen, tail );
SHA256_Final( (unsigned char*)hashA, &ctx_sha256 );
SHA256_Update( &ctx, input + midlen, tail );
SHA256_Final( (unsigned char*)hash, &ctx );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, hashA, 32 );
SHA256_Final( (unsigned char*)hashA, &ctx_sha256 );
SHA256_Init( &ctx );
SHA256_Update( &ctx, hash, 32 );
SHA256_Final( (unsigned char*)hash, &ctx );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, hashA, 32 );
SHA256_Final( (unsigned char*)hashA, &ctx_sha256 );
#else
sph_sha256_context ctx_sha256 __attribute__ ((aligned (64)));
memcpy( &ctx_sha256, &sha256t_mid, sizeof sha256t_mid );
SHA256_Init( &ctx );
SHA256_Update( &ctx, hash, 32 );
SHA256_Final( (unsigned char*)hash, &ctx );
sph_sha256( &ctx_sha256, input + midlen, tail );
sph_sha256_close( &ctx_sha256, hashA );
sph_sha256_init( &ctx_sha256 );
sph_sha256( &ctx_sha256, hashA, 32 );
sph_sha256_close( &ctx_sha256, hashA );
sph_sha256_init( &ctx_sha256 );
sph_sha256( &ctx_sha256, hashA, 32 );
sph_sha256_close( &ctx_sha256, hashA );
#endif
memcpy( output, hashA, 32 );
memcpy( output, hash, 32 );
}
int scanhash_sha256t(int thr_id, struct work *work,
uint32_t max_nonce, uint64_t *hashes_done)
int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done)
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -96,39 +71,26 @@ int scanhash_sha256t(int thr_id, struct work *work,
};
// we need bigendian data...
for (int k = 0; k < 19; k++)
be32enc(&endiandata[k], pdata[k]);
for ( int k = 0; k < 19; k++ )
be32enc( &endiandata[k], pdata[k] );
sha256t_midstate( endiandata );
#ifdef DEBUG_ALGO
if (Htarg != 0)
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for (int m=0; m < 6; m++) {
if (Htarg <= htmax[m]) {
for ( int m = 0; m < 6; m++ )
{
if ( Htarg <= htmax[m] )
{
uint32_t mask = masks[m];
do {
pdata[19] = ++n;
be32enc(&endiandata[19], n);
sha256t_hash( hash64, endiandata );
#ifndef DEBUG_ALGO
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
if ( ( !(hash64[7] & mask) ) && fulltest( hash64, ptarget ) )
{
*hashes_done = n - first_nonce + 1;
return true;
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash64[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
}
#endif
} while (n < max_nonce && !work_restart[thr_id].restart);
// see blake.c if else to understand the loop on htmax => mask
} while ( n < max_nonce && !work_restart[thr_id].restart );
break;
}
}

View File

@@ -59,17 +59,28 @@ static const sph_u32 IV512[] = {
C32(0xE275EADE), C32(0x502D9FCD), C32(0xB9357178), C32(0x022A4B9A)
};
// Partially rotate elements in two 128 bit vectors as one 256 bit vector
// and return the rotated high 128 bits.
// Partially rotate elements in two 128 bit vectors a & b as one 256 bit vector
// and return the rotated 128 bit vector a.
// a[3:0] = { b[0], a[3], a[2], a[1] }
#if defined(__SSSE3__)
#define mm_ror256hi_1x32( hi, lo ) _mm_alignr_epi8( lo, hi, 4 )
#define mm_ror256hi_1x32( a, b ) _mm_alignr_epi8( b, a, 4 )
#else // SSE2
#define mm_ror256hi_1x32( hi, lo ) \
_mm_or_si128( _mm_srli_si128( hi, 4 ), \
_mm_slli_si128( lo, 12 ) )
#define mm_ror256hi_1x32( a, b ) \
_mm_or_si128( _mm_srli_si128( a, 4 ), \
_mm_slli_si128( b, 12 ) )
#endif
#if defined(__AVX2__)
// 2 way version of above
// a[7:0] = { b[4], a[7], a[6], a[5], b[0], a[3], a[2], a[1] }
#define mm256_ror2x256hi_1x32( a, b ) \
_mm256_blend_epi32( mm256_ror256_1x32( a ), \
mm256_rol256_3x32( b ), 0x88 )
#endif

View File

@@ -11,7 +11,7 @@ bool register_c11_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_c11;
gate->hash = (void*)&c11_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -17,7 +17,7 @@ bool register_timetravel_algo( algo_gate_t* gate )
gate->hash = (void*)&timetravel_hash;
#endif
gate->set_target = (void*)&tt8_set_target;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL;
return true;
};

View File

@@ -17,7 +17,7 @@ bool register_timetravel10_algo( algo_gate_t* gate )
gate->hash = (void*)&timetravel10_hash;
#endif
gate->set_target = (void*)&tt10_set_target;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL;
return true;
};

View File

@@ -11,7 +11,7 @@ bool register_x11_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x11;
gate->hash = (void*)&x11_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -89,7 +89,7 @@ bool register_x11evo_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x11evo;
gate->hash = (void*)&x11evo_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
return true;
};

View File

@@ -11,7 +11,7 @@ bool register_x11gost_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x11gost;
gate->hash = (void*)&x11gost_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -2,7 +2,7 @@
bool register_skunk_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AVX2_OPT;
#if defined (SKUNK_4WAY)
gate->miner_thread_init = (void*)&skunk_4way_thread_init;
gate->scanhash = (void*)&scanhash_skunk_4way;

View File

@@ -2,7 +2,7 @@
bool register_polytimos_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
#ifdef POLYTIMOS_4WAY
init_polytimos_4way_ctx();
gate->scanhash = (void*)&scanhash_polytimos_4way;

View File

@@ -11,7 +11,7 @@ bool register_veltor_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_veltor;
gate->hash = (void*)&veltor_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -11,7 +11,7 @@ bool register_x14_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x14;
gate->hash = (void*)&x14hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -11,7 +11,7 @@ bool register_x15_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x15;
gate->hash = (void*)&x15hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
return true;
};

View File

@@ -429,7 +429,7 @@ int scanhash_hmq1725( int thr_id, struct work *work, int32_t max_nonce,
bool register_hmq1725_algo( algo_gate_t* gate )
{
init_hmq1725_ctx();
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
gate->set_target = (void*)&scrypt_set_target;
gate->scanhash = (void*)&scanhash_hmq1725;
gate->hash = (void*)&hmq1725hash;

View File

@@ -86,7 +86,7 @@ void x16r_4way_hash( void* output, const void* input )
if ( s_ntime == UINT32_MAX )
{
const uint8_t* tmp = (uint8_t*) in0;
x16r_getAlgoString( &tmp[4], hashOrder );
x16_r_s_getAlgoString( &tmp[4], hashOrder );
}
// Input data is both 64 bit interleaved (input)
@@ -321,10 +321,11 @@ int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce,
for ( int k=0; k < 19; k++ )
be32enc( &endiandata[k], pdata[k] );
if ( s_ntime != pdata[17] )
// if ( s_ntime != pdata[17] )
if ( s_ntime != endiandata[17] )
{
uint32_t ntime = swab32(pdata[17]);
x16r_getAlgoString( (const char*) (&endiandata[1]), hashOrder );
x16_r_s_getAlgoString( (const char*) (&endiandata[1]), hashOrder );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );

View File

@@ -1,6 +1,6 @@
#include "x16r-gate.h"
void x16r_getAlgoString( const uint8_t* prevblock, char *output )
void x16r_getAlgoString( const char* prevblock, char *output )
{
char *sptr = output;
for ( int j = 0; j < X16R_HASH_FUNC_COUNT; j++ )
@@ -16,6 +16,22 @@ void x16r_getAlgoString( const uint8_t* prevblock, char *output )
*sptr = '\0';
}
void x16s_getAlgoString( const char* prevblock, char *output )
{
uint8_t* data = (uint8_t*)prevblock;
strcpy( output, "0123456789ABCDEF" );
for ( int i = 0; i < 16; i++ )
{
uint8_t b = (15 - i) >> 1; // 16 ascii hex chars, reversed
uint8_t algoDigit = (i & 1) ? data[b] & 0xF : data[b] >> 4;
int offset = algoDigit;
// insert the nth character at the front
char oldVal = output[offset];
for( int j = offset; j-- > 0; )
output[j+1] = output[j];
output[0] = oldVal;
}
}
bool register_x16r_algo( algo_gate_t* gate )
{
@@ -28,8 +44,26 @@ bool register_x16r_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x16r;
gate->hash = (void*)&x16r_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->set_target = (void*)&alt_set_target;
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
return true;
};
bool register_x16s_algo( algo_gate_t* gate )
{
#if defined (X16R_4WAY)
init_x16r_4way_ctx();
gate->scanhash = (void*)&scanhash_x16r_4way;
gate->hash = (void*)&x16r_4way_hash;
#else
init_x16r_ctx();
gate->scanhash = (void*)&scanhash_x16r;
gate->hash = (void*)&x16r_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->set_target = (void*)&alt_set_target;
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
return true;
};

View File

@@ -29,8 +29,12 @@ enum x16r_Algo {
X16R_HASH_FUNC_COUNT
};
bool (*x16_r_s_getAlgoString) ( const char*, char* );
void x16r_getAlgoString( const char* prevblock, char *output );
void x16s_getAlgoString( const char* prevblock, char *output );
bool register_x16r_algo( algo_gate_t* gate );
void x16r_getAlgoString( const uint8_t* prevblock, char *output );
bool register_x16s_algo( algo_gate_t* gate );
#if defined(X16R_4WAY)

View File

@@ -61,27 +61,7 @@ x16r_ctx_holder x16r_ctx __attribute__ ((aligned (64)));
void init_x16r_ctx()
{
//#ifdef NO_AES_NI
// sph_groestl512_init(&x16r_ctx.groestl );
// sph_echo512_init(&x16r_ctx.echo);
//#else
// init_echo( &x16r_ctx.echo, 512 );
// init_groestl( &x16r_ctx.groestl, 64 );
//#endif
// sph_blake512_init( &x16r_ctx.blake );
// sph_bmw512_init( &x16r_ctx.bmw );
// sph_skein512_init( &x16r_ctx.bmw );
// sph_jh512_init( &x16r_ctx.jh );
// sph_keccak512_init( &x16r_ctx.keccak );
// init_luffa( &x16r_ctx.luffa, 512 );
cubehashInit( &x16r_ctx.cube, 512, 16, 32 );
// sph_shavite512_init( &x16r_ctx.shavite );
// init_sd( &x16r_ctx.simd, 512 );
// sph_hamsi512_init( &x16r_ctx.hamsi );
// sph_fugue512_init( &x16r_ctx.fugue );
// sph_shabal512_init( &x16r_ctx.shabal );
// sph_whirlpool_init( &x16r_ctx.whirlpool );
// SHA512_Init( &x16r_ctx.sha512 );
};
void x16r_hash( void* output, const void* input )
@@ -94,7 +74,7 @@ void x16r_hash( void* output, const void* input )
if ( s_ntime == UINT32_MAX )
{
const uint8_t* in8 = (uint8_t*) input;
x16r_getAlgoString( &in8[4], hashOrder );
x16_r_s_getAlgoString( &in8[4], hashOrder );
}
for ( int i = 0; i < 16; i++ )
@@ -218,10 +198,14 @@ int scanhash_x16r( int thr_id, struct work *work, uint32_t max_nonce,
for ( int k=0; k < 19; k++ )
be32enc( &endiandata[k], pdata[k] );
// This code is suspicious. s_ntime is saved after byteswapping pdata[17]
// but is tested vs unswapped pdata[17]. This should result in calling
// getAlgoString every pass, but that doesn't seem to be the case.
// It appears to be working correctly as is.
if ( s_ntime != pdata[17] )
{
uint32_t ntime = swab32(pdata[17]);
x16r_getAlgoString( (const char*) (&endiandata[1]), hashOrder );
x16_r_s_getAlgoString( (const char*) (&endiandata[1]), hashOrder );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );

View File

@@ -11,7 +11,7 @@ bool register_x17_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x17;
gate->hash = (void*)&x17_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
return true;
};

View File

@@ -16,7 +16,7 @@ bool register_xevan_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_xevan;
gate->hash = (void*)&xevan_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->set_target = (void*)&xevan_set_target;
gate->get_max64 = (void*)&get_max64_0xffffLL;
return true;

View File

@@ -427,7 +427,7 @@ int64_t yescryptr16_get_max64()
void yescrypt_gate_base(algo_gate_t *gate )
{
gate->optimizations = SSE2_OPT | AVX_OPT | SHA_OPT;
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_yescrypt;
gate->hash = (void*)&yescrypt_hash;
gate->set_target = (void*)&scrypt_set_target;

View File

@@ -1,20 +1,13 @@
#ifndef AVXDEFS_H__
#define AVXDEFS_H__
// Some tools to help using AVX and AVX2.
// Some tools to help using SIMD vectors.
//
// The baseline requirements for these utilities is AVX for 128 bit vectors
// and AVX2 for 256 bit vectors. However most of the 128 bit code requires
// only SSE2 with a couple of exceptions. This provides full support for
// Intel Core2.
// The baseline requirements for these utilities is SSE2 for 128 bit vectors
// and AVX2 for 256 bit vectors.
//
// SSSE3 is required for mm_shuffle_epi8 used by bswap functions which is
// included in Core2 but not some AMD architectures.
//
// SSE4.1 is required for _mm_blend_epi16 used by some rotate functions.
//
// Slower versions of these functions are automatically selected at compile
// time.
// Some 128 bit functions have SSSE3 or SSE4.2 implementations that are
// more efficient on capable CPUs.
//
// AVX512F has more powerful 256 bit instructions but with 512 bit vectors
// available there is little reason to use the 256 bit enhancements.
@@ -159,6 +152,11 @@ static inline __m128i foo()
// These can't be used for compile time initialization.
// These should be used for all simple vectors. Use above for
// vector array initializing.
//
// _mm_setzero_si128 uses pxor instruction, it's unclear what _mm_set_epi does.
// If a pseudo constant is used repeatedly in a function it may be worthwhile
// to define a register variable to represent that constant.
// register __m128i zero = mm_zero;
// Constant zero
#define m128_zero _mm_setzero_si128()
@@ -425,7 +423,7 @@ do { \
v1 = t; \
} while(0)
/*
// No comparable rol.
#define mm_ror256_1x16( v1, v2 ) \
do { \
@@ -433,8 +431,8 @@ do { \
v1 = _mm_alignr_epi8( v2, v1, 2 ); \
v2 = t; \
} while(0)
*/
/*
#define mm_ror256_1x16( v1, v2 ) \
do { \
__m128i t; \
@@ -444,6 +442,7 @@ do { \
v2 = _mm_blend_epi16( v1, v2, 0x01 ); \
v1 = t; \
} while(0)
*/
#define mm_rol256_1x16( v1, v2 ) \
do { \
@@ -888,6 +887,41 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, int n )
#define mm256_ror512_1x128(v1, v2) _mm256_permute2x128_si256( v1, v2, 0x39 )
#define mm256_rol512_1x128(v1, v2) _mm256_permute2x128_si256( v1, v2, 0x93 )
// No comparable rol.
#define mm256_ror512_1x64( v1, v2 ) \
do { \
__m256i t = _mm256_alignr_epi8( v1, v2, 8 ); \
v1 = _mm256_alignr_epi8( v2, v1, 8 ); \
v2 = t; \
} while(0)
#define mm256_rol512_1x64( v1, v2 ) \
do { \
__m256i t; \
v1 = mm256_rol_1x64( v1 ); \
v2 = mm256_rol_1x64( v2 ); \
t = _mm256_blend_epi32( v1, v2, 0x03 ); \
v2 = _mm256_blend_epi32( v1, v2, 0xFC ); \
v1 = t; \
} while(0)
#define mm256_ror512_1x32( v1, v2 ) \
do { \
__m256i t = _mm256_alignr_epi8( v1, v2, 4 ); \
v1 = _mm256_alignr_epi8( v2, v1, 4 ); \
v2 = t; \
} while(0)
#define mm256_rol512_1x32( v1, v2 ) \
do { \
__m256i t; \
v1 = mm256_rol_1x32( v1 ); \
v2 = mm256_rol_1x32( v2 ); \
t = _mm256_blend_epi32( v1, v2, 0x01 ); \
v2 = _mm256_blend_epi32( v1, v2, 0xFE ); \
v1 = t; \
} while(0)
//
// Swap bytes in vector elements
@@ -914,7 +948,7 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, int n )
// usefulness tbd
// __m128i hi, __m128i lo, returns __m256i
#define mm256_pack_2x128( hi, lo ) \
_mm256_inserti128_si256( _mm256_castsi128_si256( hi ), lo, 0 ) \
_mm256_inserti128_si256( _mm256_castsi128_si256( lo ), hi, 1 ) \
// __m128i hi, __m128i lo, __m256i src
#define mm256_unpack_2x128( hi, lo, src ) \

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.5.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.6.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.8.5'
PACKAGE_STRING='cpuminer-opt 3.8.5'
PACKAGE_VERSION='3.8.6'
PACKAGE_STRING='cpuminer-opt 3.8.6'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.8.5 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.8.6 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1392,7 +1392,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.8.5:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.8.6:";;
esac
cat <<\_ACEOF
@@ -1497,7 +1497,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.8.5
cpuminer-opt configure 3.8.6
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.8.5, which was
It was created by cpuminer-opt $as_me 3.8.6, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2981,7 +2981,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.8.5'
VERSION='3.8.6'
cat >>confdefs.h <<_ACEOF
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.8.5, which was
This file was extended by cpuminer-opt $as_me 3.8.6, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6743,7 +6743,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.8.5
cpuminer-opt config.status 3.8.6
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.8.5])
AC_INIT([cpuminer-opt], [3.8.6])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -550,6 +550,7 @@ enum algos {
ALGO_X14,
ALGO_X15,
ALGO_X16R,
ALGO_X16S,
ALGO_X17,
ALGO_XEVAN,
ALGO_YESCRYPT,
@@ -629,6 +630,7 @@ static const char* const algo_names[] = {
"x14",
"x15",
"x16r",
"x16s",
"x17",
"xevan",
"yescrypt",
@@ -767,6 +769,7 @@ Options:\n\
x14 X14\n\
x15 X15\n\
x16r Ravencoin (RVN)\n\
x16s Pigeoncoin (PGN)\n\
x17\n\
xevan Bitsend (BSD)\n\
yescrypt Globlboost-Y (BSTY)\n\