This commit is contained in:
Jay D Dee
2023-11-21 14:18:15 -05:00
parent fc696dbbe5
commit 045b42babf
21 changed files with 573 additions and 214 deletions

View File

@@ -250,6 +250,7 @@ cpuminer_SOURCES = \
algo/x16/x16rt.c \ algo/x16/x16rt.c \
algo/x16/x16rt-4way.c \ algo/x16/x16rt-4way.c \
algo/x16/hex.c \ algo/x16/hex.c \
algo/x16/x20r.c \
algo/x16/x21s-4way.c \ algo/x16/x21s-4way.c \
algo/x16/x21s.c \ algo/x16/x21s.c \
algo/x16/minotaur.c \ algo/x16/minotaur.c \

View File

@@ -75,6 +75,11 @@ If not what makes it happen or not happen?
Change Log Change Log
---------- ----------
v23.13
Added x20r algo.
Eliminated redundant hash order calculations for x16r family.
v23.12 v23.12
Several bugs fixes and speed improvements for x16r family for all CPU architectures. Several bugs fixes and speed improvements for x16r family for all CPU architectures.

View File

@@ -368,6 +368,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_X16RT_VEIL: rc = register_x16rt_veil_algo ( gate ); break; case ALGO_X16RT_VEIL: rc = register_x16rt_veil_algo ( gate ); break;
case ALGO_X16S: rc = register_x16s_algo ( gate ); break; case ALGO_X16S: rc = register_x16s_algo ( gate ); break;
case ALGO_X17: rc = register_x17_algo ( gate ); break; case ALGO_X17: rc = register_x17_algo ( gate ); break;
case ALGO_X20R: rc = register_x20r_algo ( gate ); break;
case ALGO_X21S: rc = register_x21s_algo ( gate ); break; case ALGO_X21S: rc = register_x21s_algo ( gate ); break;
case ALGO_X22I: rc = register_x22i_algo ( gate ); break; case ALGO_X22I: rc = register_x22i_algo ( gate ); break;
case ALGO_X25X: rc = register_x25x_algo ( gate ); break; case ALGO_X25X: rc = register_x25x_algo ( gate ); break;

View File

@@ -19,12 +19,12 @@
// Perform midstate prehash of hash functions with block size <= 72 bytes, // Perform midstate prehash of hash functions with block size <= 72 bytes,
// 76 bytes for hash functions that operate on 32 bit data. // 76 bytes for hash functions that operate on 32 bit data.
void x16r_8way_prehash( void *vdata, void *pdata ) void x16r_8way_prehash( void *vdata, void *pdata, const char *hash_order )
{ {
uint32_t vdata2[20*8] __attribute__ ((aligned (64))); uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64))); uint32_t edata[20] __attribute__ ((aligned (64)));
const char elem = x16r_hash_order[0]; const char elem = hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo ) switch ( algo )
@@ -110,7 +110,8 @@ void x16r_8way_prehash( void *vdata, void *pdata )
// Called by wrapper hash function to optionally continue hashing and // Called by wrapper hash function to optionally continue hashing and
// convert to final hash. // convert to final hash.
int x16r_8way_hash_generic( void* output, const void* input, int thrid ) int x16r_8way_hash_generic( void* output, const void* input, int thrid,
const char *hash_order, const int func_count )
{ {
uint32_t vhash[20*8] __attribute__ ((aligned (128))); uint32_t vhash[20*8] __attribute__ ((aligned (128)));
uint32_t hash0[20] __attribute__ ((aligned (16))); uint32_t hash0[20] __attribute__ ((aligned (16)));
@@ -136,9 +137,9 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
input, 640 ); input, 640 );
for ( int i = 0; i < 16; i++ ) for ( int i = 0; i < func_count; i++ )
{ {
const char elem = x16r_hash_order[i]; const char elem = hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo ) switch ( algo )
@@ -474,7 +475,8 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
int x16r_8way_hash( void* output, const void* input, int thrid ) int x16r_8way_hash( void* output, const void* input, int thrid )
{ {
uint8_t hash[64*8] __attribute__ ((aligned (128))); uint8_t hash[64*8] __attribute__ ((aligned (128)));
if ( !x16r_8way_hash_generic( hash, input, thrid ) ) if ( !x16r_8way_hash_generic( hash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0; return 0;
memcpy( output, hash, 32 ); memcpy( output, hash, 32 );
@@ -495,7 +497,6 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[16*8] __attribute__ ((aligned (128))); uint32_t hash[16*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64))); uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t bedata1[2];
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
@@ -508,21 +509,18 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff; if ( bench ) ptarget[7] = 0x0cff;
bedata1[0] = bswap_32( pdata[1] ); static __thread uint32_t saved_height = UINT32_MAX;
bedata1[1] = bswap_32( pdata[2] ); if ( work->height != saved_height )
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{ {
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); vdata[1] = bswap_32( pdata[1] );
s_ntime = ntime; vdata[2] = bswap_32( pdata[2] );
saved_height = work->height;
if ( opt_debug && !thr_id ) x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime ); if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x16r_hash_order );
} }
x16r_8way_prehash( vdata, pdata ); x16r_8way_prehash( vdata, pdata, x16r_hash_order );
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32( *noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
n+7, 0, n+6, 0, n+5, 0, n+4, 0, n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
@@ -546,12 +544,12 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
#elif defined (X16R_4WAY) #elif defined (X16R_4WAY)
void x16r_4way_prehash( void *vdata, void *pdata ) void x16r_4way_prehash( void *vdata, void *pdata, const char *hash_order )
{ {
uint32_t vdata2[20*4] __attribute__ ((aligned (64))); uint32_t vdata2[20*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64))); uint32_t edata[20] __attribute__ ((aligned (64)));
const char elem = x16r_hash_order[0]; const char elem = hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo ) switch ( algo )
@@ -627,7 +625,8 @@ void x16r_4way_prehash( void *vdata, void *pdata )
} }
} }
int x16r_4way_hash_generic( void* output, const void* input, int thrid ) int x16r_4way_hash_generic( void* output, const void* input, int thrid,
const char *hash_order, const int func_count )
{ {
uint32_t vhash[20*4] __attribute__ ((aligned (128))); uint32_t vhash[20*4] __attribute__ ((aligned (128)));
uint32_t hash0[20] __attribute__ ((aligned (32))); uint32_t hash0[20] __attribute__ ((aligned (32)));
@@ -644,9 +643,9 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 ); dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
for ( int i = 0; i < 16; i++ ) for ( int i = 0; i < func_count; i++ )
{ {
const char elem = x16r_hash_order[i]; const char elem = hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo ) switch ( algo )
@@ -908,7 +907,8 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
int x16r_4way_hash( void* output, const void* input, int thrid ) int x16r_4way_hash( void* output, const void* input, int thrid )
{ {
uint8_t hash[64*4] __attribute__ ((aligned (64))); uint8_t hash[64*4] __attribute__ ((aligned (64)));
if ( !x16r_4way_hash_generic( hash, input, thrid ) ) if ( !x16r_4way_hash_generic( hash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0; return 0;
memcpy( output, hash, 32 ); memcpy( output, hash, 32 );
@@ -924,7 +924,6 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[16*4] __attribute__ ((aligned (64))); uint32_t hash[16*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64))); uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t bedata1[2];
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
@@ -937,20 +936,18 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff; if ( bench ) ptarget[7] = 0x0cff;
bedata1[0] = bswap_32( pdata[1] ); static __thread uint32_t saved_height = UINT32_MAX;
bedata1[1] = bswap_32( pdata[2] ); if ( work->height != saved_height )
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{ {
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); vdata[1] = bswap_32( pdata[1] );
s_ntime = ntime; vdata[2] = bswap_32( pdata[2] );
if ( opt_debug && !thr_id ) saved_height = work->height;
applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime ); x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x16r_hash_order );
} }
x16r_4way_prehash( vdata, pdata ); x16r_4way_prehash( vdata, pdata, x16r_hash_order );
*noncev = mm256_intrlv_blend_32( *noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do do
@@ -973,10 +970,10 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
#elif defined (X16R_2WAY) #elif defined (X16R_2WAY)
void x16r_2x64_prehash( void *vdata, void *pdata ) void x16r_2x64_prehash( void *vdata, void *pdata, const char *hash_order )
{ {
uint32_t edata[20] __attribute__ ((aligned (64))); uint32_t edata[20] __attribute__ ((aligned (64)));
const char elem = x16r_hash_order[0]; const char elem = hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo ) switch ( algo )
@@ -1051,7 +1048,8 @@ void x16r_2x64_prehash( void *vdata, void *pdata )
} }
} }
int x16r_2x64_hash_generic( void* output, const void* input, int thrid ) int x16r_2x64_hash_generic( void* output, const void* input, int thrid,
const char *hash_order, const int func_count )
{ {
uint32_t vhash[20*2] __attribute__ ((aligned (64))); uint32_t vhash[20*2] __attribute__ ((aligned (64)));
uint32_t hash0[20] __attribute__ ((aligned (32))); uint32_t hash0[20] __attribute__ ((aligned (32)));
@@ -1064,9 +1062,9 @@ int x16r_2x64_hash_generic( void* output, const void* input, int thrid )
dintrlv_2x64( hash0, hash1, input, 640 ); dintrlv_2x64( hash0, hash1, input, 640 );
for ( int i = 0; i < 16; i++ ) for ( int i = 0; i < func_count; i++ )
{ {
const char elem = x16r_hash_order[i]; const char elem = hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo ) switch ( algo )
@@ -1313,7 +1311,8 @@ int x16r_2x64_hash_generic( void* output, const void* input, int thrid )
int x16r_2x64_hash( void* output, const void* input, int thrid ) int x16r_2x64_hash( void* output, const void* input, int thrid )
{ {
uint8_t hash[64*2] __attribute__ ((aligned (64))); uint8_t hash[64*2] __attribute__ ((aligned (64)));
if ( !x16r_2x64_hash_generic( hash, input, thrid ) ) if ( !x16r_2x64_hash_generic( hash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0; return 0;
memcpy( output, hash, 32 ); memcpy( output, hash, 32 );
@@ -1327,7 +1326,6 @@ int scanhash_x16r_2x64( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[16*2] __attribute__ ((aligned (64))); uint32_t hash[16*2] __attribute__ ((aligned (64)));
uint32_t vdata[20*2] __attribute__ ((aligned (64))); uint32_t vdata[20*2] __attribute__ ((aligned (64)));
uint32_t bedata1[2];
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
@@ -1340,20 +1338,18 @@ int scanhash_x16r_2x64( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff; if ( bench ) ptarget[7] = 0x0cff;
bedata1[0] = bswap_32( pdata[1] ); static __thread uint32_t saved_height = UINT32_MAX;
bedata1[1] = bswap_32( pdata[2] ); if ( work->height != saved_height )
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{ {
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); vdata[1] = bswap_32( pdata[1] );
s_ntime = ntime; vdata[2] = bswap_32( pdata[2] );
if ( opt_debug && !thr_id ) saved_height = work->height;
applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime ); x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x16r_hash_order );
} }
x16r_2x64_prehash( vdata, pdata ); x16r_2x64_prehash( vdata, pdata, x16r_hash_order );
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev ); *noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do do
{ {

View File

@@ -5,15 +5,15 @@ __thread char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ] = { 0 };
void (*x16_r_s_getAlgoString) ( const uint8_t*, char* ) = NULL; void (*x16_r_s_getAlgoString) ( const uint8_t*, char* ) = NULL;
#if defined (X16R_8WAY) #if defined(X16R_8WAY)
__thread x16r_8way_context_overlay x16r_ctx; __thread x16r_8way_context_overlay x16r_ctx;
#elif defined (X16R_4WAY) #elif defined(X16R_4WAY)
__thread x16r_4way_context_overlay x16r_ctx; __thread x16r_4way_context_overlay x16r_ctx;
#elif defined (X16R_2WAY) #elif defined(X16R_2WAY)
__thread x16r_2x64_context_overlay x16r_ctx; __thread x16r_2x64_context_overlay x16r_ctx;
@@ -55,13 +55,13 @@ void x16s_getAlgoString( const uint8_t* prevblock, char *output )
bool register_x16r_algo( algo_gate_t* gate ) bool register_x16r_algo( algo_gate_t* gate )
{ {
#if defined (X16R_8WAY) #if defined(X16R_8WAY)
gate->scanhash = (void*)&scanhash_x16r_8way; gate->scanhash = (void*)&scanhash_x16r_8way;
gate->hash = (void*)&x16r_8way_hash; gate->hash = (void*)&x16r_8way_hash;
#elif defined (X16R_4WAY) #elif defined(X16R_4WAY)
gate->scanhash = (void*)&scanhash_x16r_4way; gate->scanhash = (void*)&scanhash_x16r_4way;
gate->hash = (void*)&x16r_4way_hash; gate->hash = (void*)&x16r_4way_hash;
#elif defined (X16R_2WAY) #elif defined(X16R_2WAY)
gate->scanhash = (void*)&scanhash_x16r_2x64; gate->scanhash = (void*)&scanhash_x16r_2x64;
gate->hash = (void*)&x16r_2x64_hash; gate->hash = (void*)&x16r_2x64_hash;
#else #else
@@ -77,13 +77,13 @@ bool register_x16r_algo( algo_gate_t* gate )
bool register_x16rv2_algo( algo_gate_t* gate ) bool register_x16rv2_algo( algo_gate_t* gate )
{ {
#if defined (X16RV2_8WAY) #if defined(X16RV2_8WAY)
gate->scanhash = (void*)&scanhash_x16rv2_8way; gate->scanhash = (void*)&scanhash_x16rv2_8way;
gate->hash = (void*)&x16rv2_8way_hash; gate->hash = (void*)&x16rv2_8way_hash;
#elif defined (X16RV2_4WAY) #elif defined(X16RV2_4WAY)
gate->scanhash = (void*)&scanhash_x16rv2_4way; gate->scanhash = (void*)&scanhash_x16rv2_4way;
gate->hash = (void*)&x16rv2_4way_hash; gate->hash = (void*)&x16rv2_4way_hash;
#elif defined (X16RV2_2WAY) #elif defined(X16RV2_2WAY)
gate->scanhash = (void*)&scanhash_x16rv2_2x64; gate->scanhash = (void*)&scanhash_x16rv2_2x64;
gate->hash = (void*)&x16rv2_2x64_hash; gate->hash = (void*)&x16rv2_2x64_hash;
#else #else
@@ -99,13 +99,13 @@ bool register_x16rv2_algo( algo_gate_t* gate )
bool register_x16s_algo( algo_gate_t* gate ) bool register_x16s_algo( algo_gate_t* gate )
{ {
#if defined (X16R_8WAY) #if defined(X16R_8WAY)
gate->scanhash = (void*)&scanhash_x16r_8way; gate->scanhash = (void*)&scanhash_x16r_8way;
gate->hash = (void*)&x16r_8way_hash; gate->hash = (void*)&x16r_8way_hash;
#elif defined (X16R_4WAY) #elif defined(X16R_4WAY)
gate->scanhash = (void*)&scanhash_x16r_4way; gate->scanhash = (void*)&scanhash_x16r_4way;
gate->hash = (void*)&x16r_4way_hash; gate->hash = (void*)&x16r_4way_hash;
#elif defined (X16R_2WAY) #elif defined(X16R_2WAY)
gate->scanhash = (void*)&scanhash_x16r_2x64; gate->scanhash = (void*)&scanhash_x16r_2x64;
gate->hash = (void*)&x16r_2x64_hash; gate->hash = (void*)&x16r_2x64_hash;
#else #else
@@ -235,13 +235,13 @@ void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
bool register_x16rt_algo( algo_gate_t* gate ) bool register_x16rt_algo( algo_gate_t* gate )
{ {
#if defined (X16RT_8WAY) #if defined(X16RT_8WAY)
gate->scanhash = (void*)&scanhash_x16rt_8way; gate->scanhash = (void*)&scanhash_x16rt_8way;
gate->hash = (void*)&x16r_8way_hash; gate->hash = (void*)&x16r_8way_hash;
#elif defined (X16RT_4WAY) #elif defined(X16RT_4WAY)
gate->scanhash = (void*)&scanhash_x16rt_4way; gate->scanhash = (void*)&scanhash_x16rt_4way;
gate->hash = (void*)&x16r_4way_hash; gate->hash = (void*)&x16r_4way_hash;
#elif defined (X16RT_2WAY) #elif defined(X16RT_2WAY)
gate->scanhash = (void*)&scanhash_x16rt_2x64; gate->scanhash = (void*)&scanhash_x16rt_2x64;
gate->hash = (void*)&x16r_2x64_hash; gate->hash = (void*)&x16r_2x64_hash;
#else #else
@@ -256,13 +256,13 @@ bool register_x16rt_algo( algo_gate_t* gate )
bool register_x16rt_veil_algo( algo_gate_t* gate ) bool register_x16rt_veil_algo( algo_gate_t* gate )
{ {
#if defined (X16RT_8WAY) #if defined(X16RT_8WAY)
gate->scanhash = (void*)&scanhash_x16rt_8way; gate->scanhash = (void*)&scanhash_x16rt_8way;
gate->hash = (void*)&x16r_8way_hash; gate->hash = (void*)&x16r_8way_hash;
#elif defined (X16RT_4WAY) #elif defined(X16RT_4WAY)
gate->scanhash = (void*)&scanhash_x16rt_4way; gate->scanhash = (void*)&scanhash_x16rt_4way;
gate->hash = (void*)&x16r_4way_hash; gate->hash = (void*)&x16r_4way_hash;
#elif defined (X16RT_2WAY) #elif defined(X16RT_2WAY)
gate->scanhash = (void*)&scanhash_x16rt_2x64; gate->scanhash = (void*)&scanhash_x16rt_2x64;
gate->hash = (void*)&x16r_2x64_hash; gate->hash = (void*)&x16r_2x64_hash;
#else #else
@@ -296,15 +296,15 @@ bool register_hex_algo( algo_gate_t* gate )
bool register_x21s_algo( algo_gate_t* gate ) bool register_x21s_algo( algo_gate_t* gate )
{ {
#if defined (X21S_8WAY) #if defined(X21S_8WAY)
gate->scanhash = (void*)&scanhash_x21s_8way; gate->scanhash = (void*)&scanhash_x21s_8way;
gate->hash = (void*)&x21s_8way_hash; gate->hash = (void*)&x21s_8way_hash;
gate->miner_thread_init = (void*)&x21s_8way_thread_init; gate->miner_thread_init = (void*)&x21s_8way_thread_init;
#elif defined (X21S_4WAY) #elif defined(X21S_4WAY)
gate->scanhash = (void*)&scanhash_x21s_4way; gate->scanhash = (void*)&scanhash_x21s_4way;
gate->hash = (void*)&x21s_4way_hash; gate->hash = (void*)&x21s_4way_hash;
gate->miner_thread_init = (void*)&x21s_4way_thread_init; gate->miner_thread_init = (void*)&x21s_4way_thread_init;
#elif defined (X21S_2WAY) #elif defined(X21S_2WAY)
gate->scanhash = (void*)&scanhash_x21s_2x64; gate->scanhash = (void*)&scanhash_x21s_2x64;
gate->hash = (void*)&x21s_2x64_hash; gate->hash = (void*)&x21s_2x64_hash;
gate->miner_thread_init = (void*)&x21s_2x64_thread_init; gate->miner_thread_init = (void*)&x21s_2x64_thread_init;

View File

@@ -149,18 +149,23 @@ union _x16r_8way_context_overlay
hashState_echo echo; hashState_echo echo;
#endif #endif
} __attribute__ ((aligned (64))); } __attribute__ ((aligned (64)));
#define _x16r_8x64_context_overlay _x16r_8way_context_overlay
typedef union _x16r_8way_context_overlay x16r_8way_context_overlay; typedef union _x16r_8way_context_overlay x16r_8way_context_overlay;
#define x16r_8x64_context_overlay x16r_8way_context_overlay
extern __thread x16r_8way_context_overlay x16r_ctx; extern __thread x16r_8way_context_overlay x16r_ctx;
void x16r_8way_prehash( void *, void * ); void x16r_8way_prehash( void *, void *, const char * );
int x16r_8way_hash_generic( void *, const void *, int ); int x16r_8way_hash_generic( void *, const void *, int, const char*, const int );
int x16r_8way_hash( void *, const void *, int ); int x16r_8way_hash( void *, const void *, int );
int scanhash_x16r_8way( struct work *, uint32_t , int scanhash_x16r_8way( struct work *, uint32_t ,
uint64_t *, struct thr_info * ); uint64_t *, struct thr_info * );
extern __thread x16r_8way_context_overlay x16r_ctx;
#define x16r_8x64_prehash x16r_8way_prehash
#define x16r_8x64_hash_generic x16r_8way_hash_generic
#define x16r_8x64_hash x16r_8way_hash
#define scanhash_x16r_8x64 scanhash_x16r_8x64
#elif defined(X16R_4WAY) #elif defined(X16R_4WAY)
@@ -189,17 +194,23 @@ union _x16r_4way_context_overlay
sph_whirlpool_context whirlpool; sph_whirlpool_context whirlpool;
sha512_4way_context sha512; sha512_4way_context sha512;
} __attribute__ ((aligned (64))); } __attribute__ ((aligned (64)));
#define _x16r_4x64_context_overlay _x16r_4way_context_overlay
typedef union _x16r_4way_context_overlay x16r_4way_context_overlay; typedef union _x16r_4way_context_overlay x16r_4way_context_overlay;
#define x16r_4x64_context_overlay x16r_4way_context_overlay
extern __thread x16r_4way_context_overlay x16r_ctx; extern __thread x16r_4way_context_overlay x16r_ctx;
void x16r_4way_prehash( void *, void * ); void x16r_4way_prehash( void *, void *, const char * );
int x16r_4way_hash_generic( void *, const void *, int ); int x16r_4way_hash_generic( void *, const void *, int, const char*, const int );
int x16r_4way_hash( void *, const void *, int ); int x16r_4way_hash( void *, const void *, int );
int scanhash_x16r_4way( struct work *, uint32_t, int scanhash_x16r_4way( struct work *, uint32_t,
uint64_t *, struct thr_info * ); uint64_t *, struct thr_info * );
extern __thread x16r_4way_context_overlay x16r_ctx;
#define x16r_4x64_prehash x16r_4way_prehash
#define x16r_4x64_hash_generic x16r_4way_hash_generic
#define x16r_4x64_hash x16r_4way_hash
#define scanhash_x16r_4x64 scanhash_x16r_4x64
#elif defined(X16R_2WAY) #elif defined(X16R_2WAY)
@@ -241,8 +252,8 @@ union _x16r_2x64_context_overlay
typedef union _x16r_2x64_context_overlay x16r_2x64_context_overlay; typedef union _x16r_2x64_context_overlay x16r_2x64_context_overlay;
void x16r_2x64_prehash( void *, void * ); void x16r_2x64_prehash( void *, void *, const char * );
int x16r_2x64_hash_generic( void *, const void *, int ); int x16r_2x64_hash_generic( void *, const void *, int, const char*, const int );
int x16r_2x64_hash( void *, const void *, int ); int x16r_2x64_hash( void *, const void *, int );
int scanhash_x16r_2x64( struct work *, uint32_t, int scanhash_x16r_2x64( struct work *, uint32_t,
uint64_t *, struct thr_info * ); uint64_t *, struct thr_info * );
@@ -288,8 +299,8 @@ typedef union _x16r_context_overlay x16r_context_overlay;
extern __thread x16r_context_overlay x16r_ref_ctx; extern __thread x16r_context_overlay x16r_ref_ctx;
void x16r_prehash( void *, void * ); void x16r_prehash( void *, void *, const char * );
int x16r_hash_generic( void *, const void *, int ); int x16r_hash_generic( void *, const void *, int, const char*, const int );
int x16r_hash( void *, const void *, int ); int x16r_hash( void *, const void *, int );
int scanhash_x16r( struct work *, uint32_t, uint64_t *, struct thr_info * ); int scanhash_x16r( struct work *, uint32_t, uint64_t *, struct thr_info * );

View File

@@ -10,9 +10,9 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
void x16r_prehash( void *edata, void *pdata ) void x16r_prehash( void *edata, void *pdata, const char *hash_order )
{ {
const char elem = x16r_hash_order[0]; const char elem = hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo ) switch ( algo )
@@ -52,17 +52,18 @@ void x16r_prehash( void *edata, void *pdata )
} }
} }
int x16r_hash_generic( void* output, const void* input, int thrid ) int x16r_hash_generic( void* output, const void* input, int thrid,
const char *hash_order, const int func_count )
{ {
uint32_t _ALIGN(128) hash[16]; uint32_t _ALIGN(32) hash[16];
x16r_context_overlay ctx; x16r_context_overlay ctx;
memcpy( &ctx, &x16r_ref_ctx, sizeof(ctx) ); memcpy( &ctx, &x16r_ref_ctx, sizeof(ctx) );
void *in = (void*) input; void *in = (void*) input;
int size = 80; int size = 80;
for ( int i = 0; i < 16; i++ ) for ( int i = 0; i < func_count; i++ )
{ {
const char elem = x16r_hash_order[i]; const char elem = hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo ) switch ( algo )
@@ -196,7 +197,8 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
int x16r_hash( void* output, const void* input, int thrid ) int x16r_hash( void* output, const void* input, int thrid )
{ {
uint8_t hash[64] __attribute__ ((aligned (64))); uint8_t hash[64] __attribute__ ((aligned (64)));
if ( !x16r_hash_generic( hash, input, thrid ) ) if ( !x16r_hash_generic( hash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0; return 0;
memcpy( output, hash, 32 ); memcpy( output, hash, 32 );
@@ -206,8 +208,8 @@ int x16r_hash( void* output, const void* input, int thrid )
int scanhash_x16r( struct work *work, uint32_t max_nonce, int scanhash_x16r( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
uint32_t _ALIGN(128) hash32[8]; uint32_t _ALIGN(32) hash32[8];
uint32_t _ALIGN(128) edata[20]; uint32_t _ALIGN(32) edata[20];
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
@@ -229,7 +231,7 @@ int scanhash_x16r( struct work *work, uint32_t max_nonce,
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
} }
x16r_prehash( edata, pdata ); x16r_prehash( edata, pdata, x16r_hash_order );
do do
{ {

View File

@@ -30,12 +30,12 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
x16rt_getTimeHash( masked_ntime, &timeHash ); x16rt_getTimeHash( masked_ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], x16r_hash_order ); x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
s_ntime = masked_ntime; s_ntime = masked_ntime;
if ( !thr_id ) if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x", applog( LOG_INFO, "Hash order %s, Ntime %08x",
x16r_hash_order, bswap_32( pdata[17] ), timeHash ); x16r_hash_order, bswap_32( pdata[17] ) );
} }
x16r_8way_prehash( vdata, pdata ); x16r_8way_prehash( vdata, pdata, x16r_hash_order );
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32( *noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
n+7, 0, n+6, 0, n+5, 0, n+4, 0, n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
@@ -84,12 +84,12 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
x16rt_getTimeHash( masked_ntime, &timeHash ); x16rt_getTimeHash( masked_ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], x16r_hash_order ); x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
s_ntime = masked_ntime; s_ntime = masked_ntime;
if ( !thr_id ) if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x", applog( LOG_INFO, "Hash order %s, Ntime %08x",
x16r_hash_order, bswap_32( pdata[17] ), timeHash ); x16r_hash_order, bswap_32( pdata[17] ) );
} }
x16r_4way_prehash( vdata, pdata ); x16r_4way_prehash( vdata, pdata, x16r_hash_order );
*noncev = mm256_intrlv_blend_32( *noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do do
@@ -137,12 +137,12 @@ int scanhash_x16rt_2x64( struct work *work, uint32_t max_nonce,
x16rt_getTimeHash( masked_ntime, &timeHash ); x16rt_getTimeHash( masked_ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], x16r_hash_order ); x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
s_ntime = masked_ntime; s_ntime = masked_ntime;
if ( !thr_id ) if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x", applog( LOG_INFO, "Hash order %s, Ntime %08x",
x16r_hash_order, bswap_32( pdata[17] ), timeHash ); x16r_hash_order, bswap_32( pdata[17] ) );
} }
x16r_2x64_prehash( vdata, pdata ); x16r_2x64_prehash( vdata, pdata, x16r_hash_order );
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev ); *noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do do
{ {

View File

@@ -31,7 +31,7 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
x16r_hash_order, swab32( pdata[17] ), timeHash ); x16r_hash_order, swab32( pdata[17] ), timeHash );
} }
x16r_prehash( edata, pdata ); x16r_prehash( edata, pdata, x16r_hash_order );
do do
{ {

View File

@@ -593,7 +593,6 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
uint32_t vdata[20*8] __attribute__ ((aligned (64))); uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t vdata2[20*8] __attribute__ ((aligned (64))); uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64))); uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t bedata1[2] __attribute__((aligned(64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
@@ -606,19 +605,15 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff; if ( bench ) ptarget[7] = 0x0cff;
mm512_bswap32_intrlv80_8x64( vdata, pdata ); static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{ {
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); vdata[1] = bswap_32( pdata[1] );
s_ntime = ntime; vdata[2] = bswap_32( pdata[2] );
saved_height = work->height;
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id ) if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); applog( LOG_INFO, "hash order %s", x16r_hash_order );
} }
// Do midstate prehash on hash functions with block size <= 64 bytes. // Do midstate prehash on hash functions with block size <= 64 bytes.
@@ -1108,7 +1103,6 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t vdata32[20*4] __attribute__ ((aligned (64))); uint32_t vdata32[20*4] __attribute__ ((aligned (64)));
uint32_t edata[20]; uint32_t edata[20];
uint32_t bedata1[2];
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
@@ -1121,17 +1115,15 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0fff; if ( bench ) ptarget[7] = 0x0fff;
bedata1[0] = bswap_32( pdata[1] ); static __thread uint32_t saved_height = UINT32_MAX;
bedata1[1] = bswap_32( pdata[2] ); if ( work->height != saved_height )
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32(pdata[17]);
if ( s_ntime != ntime )
{ {
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); vdata[1] = bswap_32( pdata[1] );
s_ntime = ntime; vdata[2] = bswap_32( pdata[2] );
saved_height = work->height;
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id ) if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); applog( LOG_INFO, "hash order %s", x16r_hash_order );
} }
// Do midstate prehash on hash functions with block size <= 64 bytes. // Do midstate prehash on hash functions with block size <= 64 bytes.
@@ -1550,7 +1542,6 @@ int scanhash_x16rv2_2x64( struct work *work, uint32_t max_nonce,
uint32_t hash[2*16] __attribute__ ((aligned (64))); uint32_t hash[2*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*2] __attribute__ ((aligned (64))); uint32_t vdata[24*2] __attribute__ ((aligned (64)));
uint32_t edata[20]; uint32_t edata[20];
uint32_t bedata1[2];
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
@@ -1563,17 +1554,15 @@ int scanhash_x16rv2_2x64( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0fff; if ( bench ) ptarget[7] = 0x0fff;
bedata1[0] = bswap_32( pdata[1] ); static __thread uint32_t saved_height = UINT32_MAX;
bedata1[1] = bswap_32( pdata[2] ); if ( work->height != saved_height )
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32(pdata[17]);
if ( s_ntime != ntime )
{ {
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); vdata[1] = bswap_32( pdata[1] );
s_ntime = ntime; vdata[2] = bswap_32( pdata[2] );
saved_height = work->height;
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id ) if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); applog( LOG_INFO, "hash order %s", x16r_hash_order );
} }
// Do midstate prehash on hash functions with block size <= 64 bytes. // Do midstate prehash on hash functions with block size <= 64 bytes.

362
algo/x16/x20r.c Normal file
View File

@@ -0,0 +1,362 @@
#include "miner.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/sha/sph_sha2.h"
#include "x16r-gate.h"
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define X20R_8WAY 1
#elif defined(__AVX2__) && defined(__AES__)
#define X20R_4WAY 1
#elif defined(__SSE2__) || defined(__ARM_NEON)
#define X20R_2WAY 1
#endif
// X20R is not what it seems. It does not permute 20 functions over 20 rounds,
// it only permutes 16 of them. The last 4 functions are victims of trying to
// fit 20 elements in the space for only 16. Arithmetic overflow recycles the
// first 4 functions. Otherwise it's identical to X16R.
// Welcome to the real X20R.
#define X20R_HASH_FUNC_COUNT 20
/*
enum x20r_algo
{
BLAKE = 0,
BMW,
GROESTL,
JH,
KECCAK,
SKEIN,
LUFFA,
CUBEHASH,
SHAVITE,
SIMD,
ECHO,
HAMSI,
FUGUE,
SHABAL,
WHIRLPOOL,
SHA512,
HAVAL, // Last 4 names are meaningless and not used
GOST,
RADIOGATUN,
PANAMA,
X20R_HASH_FUNC_COUNT
};
*/
static __thread char x20r_hash_order[ X20R_HASH_FUNC_COUNT + 1 ] = {0};
static void x20r_getAlgoString(const uint8_t* prevblock, char *output)
{
char *sptr = output;
for (int j = 0; j < X20R_HASH_FUNC_COUNT; j++) {
uint8_t b = (19 - j) >> 1; // 16 ascii hex chars, reversed
uint8_t algoDigit = (j & 1) ? prevblock[b] & 0xF : prevblock[b] >> 4;
if (algoDigit >= 10)
sprintf(sptr, "%c", 'A' + (algoDigit - 10));
else
sprintf(sptr, "%u", (uint32_t) algoDigit);
sptr++;
}
*sptr = '\0';
}
#if defined(X20R_8WAY)
int x20r_8x64_hash( void* output, const void* input, int thrid )
{
uint8_t hash[64*8] __attribute__ ((aligned (128)));
if ( !x16r_8x64_hash_generic( hash, input, thrid, x20r_hash_order,
X20R_HASH_FUNC_COUNT ) )
return 0;
memcpy( output, hash, 32 );
memcpy( output+32, hash+64, 32 );
memcpy( output+64, hash+128, 32 );
memcpy( output+96, hash+192, 32 );
memcpy( output+128, hash+256, 32 );
memcpy( output+160, hash+320, 32 );
memcpy( output+192, hash+384, 32 );
memcpy( output+224, hash+448, 32 );
return 1;
}
int scanhash_x20r_8x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[16*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
const int thr_id = mythr->id;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x0cff;
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
vdata[1] = bswap_32( pdata[1] );
vdata[2] = bswap_32( pdata[2] );
vdata[3] = bswap_32( pdata[3] );
saved_height = work->height;
x20r_getAlgoString( (const uint8_t*)(&vdata[1]), x20r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x20r_hash_order );
}
x16r_8x64_prehash( vdata, pdata, x20r_hash_order );
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if( x20r_8x64_hash( hash, vdata, thr_id ) );
for ( int i = 0; i < 8; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n+i );
submit_solution( work, hash+(i<<3), mythr );
}
*noncev = _mm512_add_epi32( *noncev,
_mm512_set1_epi64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined(X20R_4WAY)
int x20r_4x64_hash( void* output, const void* input, int thrid )
{
uint8_t hash[64*4] __attribute__ ((aligned (64)));
if ( !x16r_4x64_hash_generic( hash, input, thrid, x20r_hash_order,
X20R_HASH_FUNC_COUNT ) )
return 0;
memcpy( output, hash, 32 );
memcpy( output+32, hash+64, 32 );
memcpy( output+64, hash+128, 32 );
memcpy( output+96, hash+192, 32 );
return 1;
}
int scanhash_x20r_4x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[16*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if ( bench ) ptarget[7] = 0x0cff;
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
vdata[1] = bswap_32( pdata[1] );
vdata[2] = bswap_32( pdata[2] );
vdata[3] = bswap_32( pdata[3] );
saved_height = work->height;
x20r_getAlgoString( (const uint8_t*)(&vdata[1]), x20r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x20r_hash_order );
}
x16r_4x64_prehash( vdata, pdata, x20r_hash_order );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if ( x20r_4x64_hash( hash, vdata, thr_id ) );
for ( int i = 0; i < 4; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n+i );
submit_solution( work, hash+(i<<3), mythr );
}
*noncev = _mm256_add_epi32( *noncev,
_mm256_set1_epi64x( 0x0000000400000000 ) );
n += 4;
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined(X20R_2WAY)
int x20r_2x64_hash( void* output, const void* input, int thrid )
{
uint8_t hash[64*2] __attribute__ ((aligned (64)));
if ( !x16r_2x64_hash_generic( hash, input, thrid, x20r_hash_order,
X20R_HASH_FUNC_COUNT ) )
return 0;
memcpy( output, hash, 32 );
memcpy( output+32, hash+64, 32 );
return 1;
}
int scanhash_x20r_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[16*2] __attribute__ ((aligned (64)));
uint32_t vdata[20*2] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 2;
uint32_t n = first_nonce;
v128_t *noncev = (v128_t*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if ( bench ) ptarget[7] = 0x0cff;
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
vdata[1] = bswap_32( pdata[1] );
vdata[2] = bswap_32( pdata[2] );
vdata[3] = bswap_32( pdata[3] );
saved_height = work->height;
x20r_getAlgoString( (const uint8_t*)(&vdata[1]), x20r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x20r_hash_order );
}
x16r_2x64_prehash( vdata, pdata, x20r_hash_order );
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do
{
if ( x20r_2x64_hash( hash, vdata, thr_id ) );
for ( int i = 0; i < 2; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n+i );
submit_solution( work, hash+(i<<3), mythr );
}
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
n += 2;
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#else
int x20r_hash( void* output, const void* input, int thrid )
{
uint8_t hash[64] __attribute__ ((aligned (64)));
if ( !x16r_hash_generic( hash, input, thrid, x20r_hash_order,
X20R_HASH_FUNC_COUNT ) )
return 0;
memcpy( output, hash, 32 );
return 1;
}
int scanhash_x20r( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(32) hash32[8];
uint32_t _ALIGN(32) edata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const int thr_id = mythr->id;
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &( work_restart[thr_id].restart );
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x0cff;
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
edata[1] = bswap_32( pdata[1] );
edata[2] = bswap_32( pdata[2] );
edata[3] = bswap_32( pdata[3] );
saved_height = work->height;
x20r_getAlgoString( (const uint8_t*)(&edata[1]), x20r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x20r_hash_order );
}
x16r_prehash( edata, pdata, x20r_hash_order );
do
{
edata[19] = nonce;
if ( x20r_hash( hash32, edata, thr_id ) )
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( nonce );
submit_solution( work, hash32, mythr );
}
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
return 0;
}
#endif
bool register_x20r_algo( algo_gate_t* gate )
{
#if defined (X20R_8WAY)
gate->scanhash = (void*)&scanhash_x20r_8x64;
#elif defined (X20R_4WAY)
gate->scanhash = (void*)&scanhash_x20r_4x64;
#elif defined (X20R_2WAY)
gate->scanhash = (void*)&scanhash_x20r_2x64;
#else
gate->scanhash = (void*)&scanhash_x20r;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
| NEON_OPT;
opt_target_factor = 256.0;
return true;
};

View File

@@ -43,7 +43,8 @@ int x21s_8way_hash( void* output, const void* input, int thrid )
uint32_t *hash7 = (uint32_t*)( shash+448 ); uint32_t *hash7 = (uint32_t*)( shash+448 );
x21s_8way_context_overlay ctx; x21s_8way_context_overlay ctx;
if ( !x16r_8way_hash_generic( shash, input, thrid ) ) if ( !x16r_8way_hash_generic( shash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0; return 0;
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -135,7 +136,6 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
uint32_t vdata[20*8] __attribute__ ((aligned (64))); uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &hash[7<<3]; uint32_t *hash7 = &hash[7<<3];
uint32_t lane_hash[8] __attribute__ ((aligned (64))); uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t bedata1[2] __attribute__((aligned(64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
@@ -149,20 +149,18 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff; if ( bench ) ptarget[7] = 0x0cff;
bedata1[0] = bswap_32( pdata[1] ); static __thread uint32_t saved_height = UINT32_MAX;
bedata1[1] = bswap_32( pdata[2] ); if ( work->height != saved_height )
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{ {
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); vdata[1] = bswap_32( pdata[1] );
s_ntime = ntime; vdata[2] = bswap_32( pdata[2] );
if ( opt_debug && !thr_id ) saved_height = work->height;
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x16r_hash_order );
} }
x16r_8way_prehash( vdata, pdata ); x16r_8way_prehash( vdata, pdata, x16r_hash_order );
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32( *noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
n+7, 0, n+6, 0, n+5, 0, n+4, 0, n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
@@ -224,7 +222,8 @@ int x21s_4way_hash( void* output, const void* input, int thrid )
uint32_t *hash2 = (uint32_t*)( shash+128 ); uint32_t *hash2 = (uint32_t*)( shash+128 );
uint32_t *hash3 = (uint32_t*)( shash+192 ); uint32_t *hash3 = (uint32_t*)( shash+192 );
if ( !x16r_4way_hash_generic( shash, input, thrid ) ) if ( !x16r_4way_hash_generic( shash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0; return 0;
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
@@ -308,20 +307,18 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff; if ( bench ) ptarget[7] = 0x0cff;
bedata1[0] = bswap_32( pdata[1] ); static __thread uint32_t saved_height = UINT32_MAX;
bedata1[1] = bswap_32( pdata[2] ); if ( work->height != saved_height )
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{ {
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); vdata[1] = bswap_32( pdata[1] );
s_ntime = ntime; vdata[2] = bswap_32( pdata[2] );
if ( opt_debug && !thr_id ) saved_height = work->height;
applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime ); x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x16r_hash_order );
} }
x16r_4way_prehash( vdata, pdata ); x16r_4way_prehash( vdata, pdata, x16r_hash_order );
*noncev = mm256_intrlv_blend_32( *noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev ); _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do do
@@ -372,7 +369,8 @@ int x21s_2x64_hash( void* output, const void* input, int thrid )
uint32_t *hash0 = (uint32_t*) shash; uint32_t *hash0 = (uint32_t*) shash;
uint32_t *hash1 = (uint32_t*)( shash+64 ); uint32_t *hash1 = (uint32_t*)( shash+64 );
if ( !x16r_2x64_hash_generic( shash, input, thrid ) ) if ( !x16r_2x64_hash_generic( shash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0; return 0;
sph_haval256_5_init( &ctx.haval ); sph_haval256_5_init( &ctx.haval );
@@ -412,7 +410,6 @@ int scanhash_x21s_2x64( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[16*2] __attribute__ ((aligned (64))); uint32_t hash[16*2] __attribute__ ((aligned (64)));
uint32_t vdata[20*2] __attribute__ ((aligned (64))); uint32_t vdata[20*2] __attribute__ ((aligned (64)));
uint32_t bedata1[2] __attribute__((aligned(64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
@@ -425,20 +422,18 @@ int scanhash_x21s_2x64( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff; if ( bench ) ptarget[7] = 0x0cff;
bedata1[0] = bswap_32( pdata[1] ); static __thread uint32_t saved_height = UINT32_MAX;
bedata1[1] = bswap_32( pdata[2] ); if ( work->height != saved_height )
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{ {
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order ); vdata[1] = bswap_32( pdata[1] );
s_ntime = ntime; vdata[2] = bswap_32( pdata[2] );
if ( opt_debug && !thr_id ) saved_height = work->height;
applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime ); x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x16r_hash_order );
} }
x16r_2x64_prehash( vdata, pdata ); x16r_2x64_prehash( vdata, pdata, x16r_hash_order );
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev ); *noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do do
{ {

View File

@@ -33,7 +33,8 @@ int x21s_hash( void* output, const void* input, int thrid )
uint32_t _ALIGN(128) hash[16]; uint32_t _ALIGN(128) hash[16];
x21s_context_overlay ctx; x21s_context_overlay ctx;
if ( !x16r_hash_generic( hash, input, thrid ) ) if ( !x16r_hash_generic( hash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0; return 0;
sph_haval256_5_init( &ctx.haval ); sph_haval256_5_init( &ctx.haval );
@@ -84,7 +85,7 @@ int scanhash_x21s( struct work *work, uint32_t max_nonce,
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime ); applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
} }
x16r_prehash( edata, pdata ); x16r_prehash( edata, pdata, x16r_hash_order );
do do
{ {

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.12. # Generated by GNU Autoconf 2.71 for cpuminer-opt 23.13.
# #
# #
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -608,8 +608,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='cpuminer-opt' PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='23.12' PACKAGE_VERSION='23.13'
PACKAGE_STRING='cpuminer-opt 23.12' PACKAGE_STRING='cpuminer-opt 23.13'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures cpuminer-opt 23.12 to adapt to many kinds of systems. \`configure' configures cpuminer-opt 23.13 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1432,7 +1432,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 23.12:";; short | recursive ) echo "Configuration of cpuminer-opt 23.13:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1538,7 +1538,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
cpuminer-opt configure 23.12 cpuminer-opt configure 23.13
generated by GNU Autoconf 2.71 generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc. Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 23.12, which was It was created by cpuminer-opt $as_me 23.13, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw $ $0$ac_configure_args_raw
@@ -3593,7 +3593,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='cpuminer-opt' PACKAGE='cpuminer-opt'
VERSION='23.12' VERSION='23.13'
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by cpuminer-opt $as_me 23.12, which was This file was extended by cpuminer-opt $as_me 23.13, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped' ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\ ac_cs_version="\\
cpuminer-opt config.status 23.12 cpuminer-opt config.status 23.13
configured by $0, generated by GNU Autoconf 2.71, configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [23.12]) AC_INIT([cpuminer-opt], [23.13])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 23.12. # Generated by GNU Autoconf 2.69 for cpuminer-opt 23.13.
# #
# #
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='cpuminer-opt' PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='23.12' PACKAGE_VERSION='23.13'
PACKAGE_STRING='cpuminer-opt 23.12' PACKAGE_STRING='cpuminer-opt 23.13'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures cpuminer-opt 23.12 to adapt to many kinds of systems. \`configure' configures cpuminer-opt 23.13 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 23.12:";; short | recursive ) echo "Configuration of cpuminer-opt 23.13:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
cpuminer-opt configure 23.12 cpuminer-opt configure 23.13
generated by GNU Autoconf 2.69 generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc. Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 23.12, which was It was created by cpuminer-opt $as_me 23.13, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@ $ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='cpuminer-opt' PACKAGE='cpuminer-opt'
VERSION='23.12' VERSION='23.13'
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
@@ -6718,7 +6718,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by cpuminer-opt $as_me 23.12, which was This file was extended by cpuminer-opt $as_me 23.13, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -6784,7 +6784,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
cpuminer-opt config.status 23.12 cpuminer-opt config.status 23.13
configured by $0, generated by GNU Autoconf 2.69, configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -2837,15 +2837,6 @@ static void show_credits()
#define check_cpu_capability() cpu_capability( false ) #define check_cpu_capability() cpu_capability( false )
#define display_cpu_capability() cpu_capability( true ) #define display_cpu_capability() cpu_capability( true )
#if defined(__aarch64__)
#define XSTR(x) STR(x)
#define STR(x) #x
//#pragma message "Building for armv" XSTR(__ARM_ARCH)
#endif
static bool cpu_capability( bool display_only ) static bool cpu_capability( bool display_only )
{ {
char cpu_brand[0x40]; char cpu_brand[0x40];

View File

@@ -672,6 +672,7 @@ enum algos {
ALGO_X16RT_VEIL, ALGO_X16RT_VEIL,
ALGO_X16S, ALGO_X16S,
ALGO_X17, ALGO_X17,
ALGO_X20R,
ALGO_X21S, ALGO_X21S,
ALGO_X22I, ALGO_X22I,
ALGO_X25X, ALGO_X25X,
@@ -767,6 +768,7 @@ static const char* const algo_names[] = {
"x16rt-veil", "x16rt-veil",
"x16s", "x16s",
"x17", "x17",
"x20r",
"x21s", "x21s",
"x22i", "x22i",
"x25x", "x25x",
@@ -930,6 +932,7 @@ Options:\n\
x16rt-veil Veil (VEIL)\n\ x16rt-veil Veil (VEIL)\n\
x16s\n\ x16s\n\
x17\n\ x17\n\
x20r\n\
x21s\n\ x21s\n\
x22i\n\ x22i\n\
x25x\n\ x25x\n\

View File

@@ -381,7 +381,7 @@ static inline void dintrlv_4x32_512( void *dst0, void *dst1, void *dst2,
d0[15] = s[ 60]; d1[15] = s[ 61]; d2[15] = s[ 62]; d3[15] = s[ 63]; d0[15] = s[ 60]; d1[15] = s[ 61]; d2[15] = s[ 62]; d3[15] = s[ 63];
} }
#endif // SSE4_1 else SSE2 or NEON #endif // SSE4_1 or NEON else SSE2
static inline void extr_lane_4x32( void *d, const void *s, static inline void extr_lane_4x32( void *d, const void *s,
const int lane, const int bit_len ) const int lane, const int bit_len )

View File

@@ -40,7 +40,7 @@
#define v128u8_load( p ) vld1q_u16( (uint8_t*)(p) ) #define v128u8_load( p ) vld1q_u16( (uint8_t*)(p) )
#define v128u8_store( p, v ) vst1q_u16( (uint8_t*)(p), v ) #define v128u8_store( p, v ) vst1q_u16( (uint8_t*)(p), v )
// load & set1 combined // load & set1 combined, doesn't work
#define v128_load1_64(p) vld1q_dup_u64( (uint64_t*)(p) ) #define v128_load1_64(p) vld1q_dup_u64( (uint64_t*)(p) )
#define v128_load1_32(p) vld1q_dup_u32( (uint32_t*)(p) ) #define v128_load1_32(p) vld1q_dup_u32( (uint32_t*)(p) )
#define v128_load1_16(p) vld1q_dup_u16( (uint16_t*)(p) ) #define v128_load1_16(p) vld1q_dup_u16( (uint16_t*)(p) )

View File

@@ -930,7 +930,9 @@ static inline void cpu_brand_string( char* s )
#elif defined(__arm__) || defined(__aarch64__) #elif defined(__arm__) || defined(__aarch64__)
sprintf( s, "ARM 64 bit CPU" ); unsigned int cpu_info[4] = { 0 };
cpuid( 0, 0, cpu_info );
sprintf( s, "ARM 64 bit CPU, HWCAP %08x", cpu_info[0] );
#else #else