This commit is contained in:
Jay D Dee
2023-11-21 14:18:15 -05:00
parent fc696dbbe5
commit 045b42babf
21 changed files with 573 additions and 214 deletions

View File

@@ -250,6 +250,7 @@ cpuminer_SOURCES = \
algo/x16/x16rt.c \
algo/x16/x16rt-4way.c \
algo/x16/hex.c \
algo/x16/x20r.c \
algo/x16/x21s-4way.c \
algo/x16/x21s.c \
algo/x16/minotaur.c \

View File

@@ -75,6 +75,11 @@ If not what makes it happen or not happen?
Change Log
----------
v23.13
Added x20r algo.
Eliminated redundant hash order calculations for x16r family.
v23.12
Several bugs fixes and speed improvements for x16r family for all CPU architectures.

View File

@@ -368,6 +368,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_X16RT_VEIL: rc = register_x16rt_veil_algo ( gate ); break;
case ALGO_X16S: rc = register_x16s_algo ( gate ); break;
case ALGO_X17: rc = register_x17_algo ( gate ); break;
case ALGO_X20R: rc = register_x20r_algo ( gate ); break;
case ALGO_X21S: rc = register_x21s_algo ( gate ); break;
case ALGO_X22I: rc = register_x22i_algo ( gate ); break;
case ALGO_X25X: rc = register_x25x_algo ( gate ); break;

View File

@@ -19,12 +19,12 @@
// Perform midstate prehash of hash functions with block size <= 72 bytes,
// 76 bytes for hash functions that operate on 32 bit data.
void x16r_8way_prehash( void *vdata, void *pdata )
void x16r_8way_prehash( void *vdata, void *pdata, const char *hash_order )
{
uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
const char elem = x16r_hash_order[0];
const char elem = hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -110,7 +110,8 @@ void x16r_8way_prehash( void *vdata, void *pdata )
// Called by wrapper hash function to optionally continue hashing and
// convert to final hash.
int x16r_8way_hash_generic( void* output, const void* input, int thrid )
int x16r_8way_hash_generic( void* output, const void* input, int thrid,
const char *hash_order, const int func_count )
{
uint32_t vhash[20*8] __attribute__ ((aligned (128)));
uint32_t hash0[20] __attribute__ ((aligned (16)));
@@ -136,9 +137,9 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
input, 640 );
for ( int i = 0; i < 16; i++ )
for ( int i = 0; i < func_count; i++ )
{
const char elem = x16r_hash_order[i];
const char elem = hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -474,7 +475,8 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
int x16r_8way_hash( void* output, const void* input, int thrid )
{
uint8_t hash[64*8] __attribute__ ((aligned (128)));
if ( !x16r_8way_hash_generic( hash, input, thrid ) )
if ( !x16r_8way_hash_generic( hash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0;
memcpy( output, hash, 32 );
@@ -495,7 +497,6 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[16*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t bedata1[2];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -508,21 +509,18 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff;
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime );
vdata[1] = bswap_32( pdata[1] );
vdata[2] = bswap_32( pdata[2] );
saved_height = work->height;
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x16r_hash_order );
}
x16r_8way_prehash( vdata, pdata );
x16r_8way_prehash( vdata, pdata, x16r_hash_order );
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
@@ -546,12 +544,12 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
#elif defined (X16R_4WAY)
void x16r_4way_prehash( void *vdata, void *pdata )
void x16r_4way_prehash( void *vdata, void *pdata, const char *hash_order )
{
uint32_t vdata2[20*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
const char elem = x16r_hash_order[0];
const char elem = hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -627,7 +625,8 @@ void x16r_4way_prehash( void *vdata, void *pdata )
}
}
int x16r_4way_hash_generic( void* output, const void* input, int thrid )
int x16r_4way_hash_generic( void* output, const void* input, int thrid,
const char *hash_order, const int func_count )
{
uint32_t vhash[20*4] __attribute__ ((aligned (128)));
uint32_t hash0[20] __attribute__ ((aligned (32)));
@@ -644,9 +643,9 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
for ( int i = 0; i < 16; i++ )
for ( int i = 0; i < func_count; i++ )
{
const char elem = x16r_hash_order[i];
const char elem = hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -908,7 +907,8 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
int x16r_4way_hash( void* output, const void* input, int thrid )
{
uint8_t hash[64*4] __attribute__ ((aligned (64)));
if ( !x16r_4way_hash_generic( hash, input, thrid ) )
if ( !x16r_4way_hash_generic( hash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0;
memcpy( output, hash, 32 );
@@ -924,7 +924,6 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[16*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t bedata1[2];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -937,20 +936,18 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff;
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime );
vdata[1] = bswap_32( pdata[1] );
vdata[2] = bswap_32( pdata[2] );
saved_height = work->height;
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x16r_hash_order );
}
x16r_4way_prehash( vdata, pdata );
x16r_4way_prehash( vdata, pdata, x16r_hash_order );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
@@ -973,10 +970,10 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
#elif defined (X16R_2WAY)
void x16r_2x64_prehash( void *vdata, void *pdata )
void x16r_2x64_prehash( void *vdata, void *pdata, const char *hash_order )
{
uint32_t edata[20] __attribute__ ((aligned (64)));
const char elem = x16r_hash_order[0];
const char elem = hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -1051,7 +1048,8 @@ void x16r_2x64_prehash( void *vdata, void *pdata )
}
}
int x16r_2x64_hash_generic( void* output, const void* input, int thrid )
int x16r_2x64_hash_generic( void* output, const void* input, int thrid,
const char *hash_order, const int func_count )
{
uint32_t vhash[20*2] __attribute__ ((aligned (64)));
uint32_t hash0[20] __attribute__ ((aligned (32)));
@@ -1064,9 +1062,9 @@ int x16r_2x64_hash_generic( void* output, const void* input, int thrid )
dintrlv_2x64( hash0, hash1, input, 640 );
for ( int i = 0; i < 16; i++ )
for ( int i = 0; i < func_count; i++ )
{
const char elem = x16r_hash_order[i];
const char elem = hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -1313,7 +1311,8 @@ int x16r_2x64_hash_generic( void* output, const void* input, int thrid )
int x16r_2x64_hash( void* output, const void* input, int thrid )
{
uint8_t hash[64*2] __attribute__ ((aligned (64)));
if ( !x16r_2x64_hash_generic( hash, input, thrid ) )
if ( !x16r_2x64_hash_generic( hash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0;
memcpy( output, hash, 32 );
@@ -1327,7 +1326,6 @@ int scanhash_x16r_2x64( struct work *work, uint32_t max_nonce,
{
uint32_t hash[16*2] __attribute__ ((aligned (64)));
uint32_t vdata[20*2] __attribute__ ((aligned (64)));
uint32_t bedata1[2];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -1340,20 +1338,18 @@ int scanhash_x16r_2x64( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff;
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime );
vdata[1] = bswap_32( pdata[1] );
vdata[2] = bswap_32( pdata[2] );
saved_height = work->height;
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x16r_hash_order );
}
x16r_2x64_prehash( vdata, pdata );
x16r_2x64_prehash( vdata, pdata, x16r_hash_order );
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do
{

View File

@@ -5,15 +5,15 @@ __thread char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ] = { 0 };
void (*x16_r_s_getAlgoString) ( const uint8_t*, char* ) = NULL;
#if defined (X16R_8WAY)
#if defined(X16R_8WAY)
__thread x16r_8way_context_overlay x16r_ctx;
#elif defined (X16R_4WAY)
#elif defined(X16R_4WAY)
__thread x16r_4way_context_overlay x16r_ctx;
#elif defined (X16R_2WAY)
#elif defined(X16R_2WAY)
__thread x16r_2x64_context_overlay x16r_ctx;
@@ -55,13 +55,13 @@ void x16s_getAlgoString( const uint8_t* prevblock, char *output )
bool register_x16r_algo( algo_gate_t* gate )
{
#if defined (X16R_8WAY)
#if defined(X16R_8WAY)
gate->scanhash = (void*)&scanhash_x16r_8way;
gate->hash = (void*)&x16r_8way_hash;
#elif defined (X16R_4WAY)
#elif defined(X16R_4WAY)
gate->scanhash = (void*)&scanhash_x16r_4way;
gate->hash = (void*)&x16r_4way_hash;
#elif defined (X16R_2WAY)
#elif defined(X16R_2WAY)
gate->scanhash = (void*)&scanhash_x16r_2x64;
gate->hash = (void*)&x16r_2x64_hash;
#else
@@ -77,13 +77,13 @@ bool register_x16r_algo( algo_gate_t* gate )
bool register_x16rv2_algo( algo_gate_t* gate )
{
#if defined (X16RV2_8WAY)
#if defined(X16RV2_8WAY)
gate->scanhash = (void*)&scanhash_x16rv2_8way;
gate->hash = (void*)&x16rv2_8way_hash;
#elif defined (X16RV2_4WAY)
#elif defined(X16RV2_4WAY)
gate->scanhash = (void*)&scanhash_x16rv2_4way;
gate->hash = (void*)&x16rv2_4way_hash;
#elif defined (X16RV2_2WAY)
#elif defined(X16RV2_2WAY)
gate->scanhash = (void*)&scanhash_x16rv2_2x64;
gate->hash = (void*)&x16rv2_2x64_hash;
#else
@@ -99,13 +99,13 @@ bool register_x16rv2_algo( algo_gate_t* gate )
bool register_x16s_algo( algo_gate_t* gate )
{
#if defined (X16R_8WAY)
#if defined(X16R_8WAY)
gate->scanhash = (void*)&scanhash_x16r_8way;
gate->hash = (void*)&x16r_8way_hash;
#elif defined (X16R_4WAY)
#elif defined(X16R_4WAY)
gate->scanhash = (void*)&scanhash_x16r_4way;
gate->hash = (void*)&x16r_4way_hash;
#elif defined (X16R_2WAY)
#elif defined(X16R_2WAY)
gate->scanhash = (void*)&scanhash_x16r_2x64;
gate->hash = (void*)&x16r_2x64_hash;
#else
@@ -235,13 +235,13 @@ void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
bool register_x16rt_algo( algo_gate_t* gate )
{
#if defined (X16RT_8WAY)
#if defined(X16RT_8WAY)
gate->scanhash = (void*)&scanhash_x16rt_8way;
gate->hash = (void*)&x16r_8way_hash;
#elif defined (X16RT_4WAY)
#elif defined(X16RT_4WAY)
gate->scanhash = (void*)&scanhash_x16rt_4way;
gate->hash = (void*)&x16r_4way_hash;
#elif defined (X16RT_2WAY)
#elif defined(X16RT_2WAY)
gate->scanhash = (void*)&scanhash_x16rt_2x64;
gate->hash = (void*)&x16r_2x64_hash;
#else
@@ -256,13 +256,13 @@ bool register_x16rt_algo( algo_gate_t* gate )
bool register_x16rt_veil_algo( algo_gate_t* gate )
{
#if defined (X16RT_8WAY)
#if defined(X16RT_8WAY)
gate->scanhash = (void*)&scanhash_x16rt_8way;
gate->hash = (void*)&x16r_8way_hash;
#elif defined (X16RT_4WAY)
#elif defined(X16RT_4WAY)
gate->scanhash = (void*)&scanhash_x16rt_4way;
gate->hash = (void*)&x16r_4way_hash;
#elif defined (X16RT_2WAY)
#elif defined(X16RT_2WAY)
gate->scanhash = (void*)&scanhash_x16rt_2x64;
gate->hash = (void*)&x16r_2x64_hash;
#else
@@ -296,15 +296,15 @@ bool register_hex_algo( algo_gate_t* gate )
bool register_x21s_algo( algo_gate_t* gate )
{
#if defined (X21S_8WAY)
#if defined(X21S_8WAY)
gate->scanhash = (void*)&scanhash_x21s_8way;
gate->hash = (void*)&x21s_8way_hash;
gate->miner_thread_init = (void*)&x21s_8way_thread_init;
#elif defined (X21S_4WAY)
#elif defined(X21S_4WAY)
gate->scanhash = (void*)&scanhash_x21s_4way;
gate->hash = (void*)&x21s_4way_hash;
gate->miner_thread_init = (void*)&x21s_4way_thread_init;
#elif defined (X21S_2WAY)
#elif defined(X21S_2WAY)
gate->scanhash = (void*)&scanhash_x21s_2x64;
gate->hash = (void*)&x21s_2x64_hash;
gate->miner_thread_init = (void*)&x21s_2x64_thread_init;

View File

@@ -149,18 +149,23 @@ union _x16r_8way_context_overlay
hashState_echo echo;
#endif
} __attribute__ ((aligned (64)));
#define _x16r_8x64_context_overlay _x16r_8way_context_overlay
typedef union _x16r_8way_context_overlay x16r_8way_context_overlay;
#define x16r_8x64_context_overlay x16r_8way_context_overlay
extern __thread x16r_8way_context_overlay x16r_ctx;
void x16r_8way_prehash( void *, void * );
int x16r_8way_hash_generic( void *, const void *, int );
void x16r_8way_prehash( void *, void *, const char * );
int x16r_8way_hash_generic( void *, const void *, int, const char*, const int );
int x16r_8way_hash( void *, const void *, int );
int scanhash_x16r_8way( struct work *, uint32_t ,
uint64_t *, struct thr_info * );
extern __thread x16r_8way_context_overlay x16r_ctx;
#define x16r_8x64_prehash x16r_8way_prehash
#define x16r_8x64_hash_generic x16r_8way_hash_generic
#define x16r_8x64_hash x16r_8way_hash
#define scanhash_x16r_8x64 scanhash_x16r_8x64
#elif defined(X16R_4WAY)
@@ -189,17 +194,23 @@ union _x16r_4way_context_overlay
sph_whirlpool_context whirlpool;
sha512_4way_context sha512;
} __attribute__ ((aligned (64)));
#define _x16r_4x64_context_overlay _x16r_4way_context_overlay
typedef union _x16r_4way_context_overlay x16r_4way_context_overlay;
#define x16r_4x64_context_overlay x16r_4way_context_overlay
extern __thread x16r_4way_context_overlay x16r_ctx;
void x16r_4way_prehash( void *, void * );
int x16r_4way_hash_generic( void *, const void *, int );
void x16r_4way_prehash( void *, void *, const char * );
int x16r_4way_hash_generic( void *, const void *, int, const char*, const int );
int x16r_4way_hash( void *, const void *, int );
int scanhash_x16r_4way( struct work *, uint32_t,
uint64_t *, struct thr_info * );
extern __thread x16r_4way_context_overlay x16r_ctx;
#define x16r_4x64_prehash x16r_4way_prehash
#define x16r_4x64_hash_generic x16r_4way_hash_generic
#define x16r_4x64_hash x16r_4way_hash
#define scanhash_x16r_4x64 scanhash_x16r_4x64
#elif defined(X16R_2WAY)
@@ -241,8 +252,8 @@ union _x16r_2x64_context_overlay
typedef union _x16r_2x64_context_overlay x16r_2x64_context_overlay;
void x16r_2x64_prehash( void *, void * );
int x16r_2x64_hash_generic( void *, const void *, int );
void x16r_2x64_prehash( void *, void *, const char * );
int x16r_2x64_hash_generic( void *, const void *, int, const char*, const int );
int x16r_2x64_hash( void *, const void *, int );
int scanhash_x16r_2x64( struct work *, uint32_t,
uint64_t *, struct thr_info * );
@@ -288,8 +299,8 @@ typedef union _x16r_context_overlay x16r_context_overlay;
extern __thread x16r_context_overlay x16r_ref_ctx;
void x16r_prehash( void *, void * );
int x16r_hash_generic( void *, const void *, int );
void x16r_prehash( void *, void *, const char * );
int x16r_hash_generic( void *, const void *, int, const char*, const int );
int x16r_hash( void *, const void *, int );
int scanhash_x16r( struct work *, uint32_t, uint64_t *, struct thr_info * );

View File

@@ -10,9 +10,9 @@
#include <stdlib.h>
#include <string.h>
void x16r_prehash( void *edata, void *pdata )
void x16r_prehash( void *edata, void *pdata, const char *hash_order )
{
const char elem = x16r_hash_order[0];
const char elem = hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -52,17 +52,18 @@ void x16r_prehash( void *edata, void *pdata )
}
}
int x16r_hash_generic( void* output, const void* input, int thrid )
int x16r_hash_generic( void* output, const void* input, int thrid,
const char *hash_order, const int func_count )
{
uint32_t _ALIGN(128) hash[16];
uint32_t _ALIGN(32) hash[16];
x16r_context_overlay ctx;
memcpy( &ctx, &x16r_ref_ctx, sizeof(ctx) );
void *in = (void*) input;
int size = 80;
for ( int i = 0; i < 16; i++ )
for ( int i = 0; i < func_count; i++ )
{
const char elem = x16r_hash_order[i];
const char elem = hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -196,7 +197,8 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
int x16r_hash( void* output, const void* input, int thrid )
{
uint8_t hash[64] __attribute__ ((aligned (64)));
if ( !x16r_hash_generic( hash, input, thrid ) )
if ( !x16r_hash_generic( hash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0;
memcpy( output, hash, 32 );
@@ -206,8 +208,8 @@ int x16r_hash( void* output, const void* input, int thrid )
int scanhash_x16r( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(128) hash32[8];
uint32_t _ALIGN(128) edata[20];
uint32_t _ALIGN(32) hash32[8];
uint32_t _ALIGN(32) edata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -229,7 +231,7 @@ int scanhash_x16r( struct work *work, uint32_t max_nonce,
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
}
x16r_prehash( edata, pdata );
x16r_prehash( edata, pdata, x16r_hash_order );
do
{

View File

@@ -30,12 +30,12 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
x16rt_getTimeHash( masked_ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
s_ntime = masked_ntime;
if ( !thr_id )
applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x",
x16r_hash_order, bswap_32( pdata[17] ), timeHash );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "Hash order %s, Ntime %08x",
x16r_hash_order, bswap_32( pdata[17] ) );
}
x16r_8way_prehash( vdata, pdata );
x16r_8way_prehash( vdata, pdata, x16r_hash_order );
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
@@ -84,12 +84,12 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
x16rt_getTimeHash( masked_ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
s_ntime = masked_ntime;
if ( !thr_id )
applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x",
x16r_hash_order, bswap_32( pdata[17] ), timeHash );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "Hash order %s, Ntime %08x",
x16r_hash_order, bswap_32( pdata[17] ) );
}
x16r_4way_prehash( vdata, pdata );
x16r_4way_prehash( vdata, pdata, x16r_hash_order );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
@@ -137,12 +137,12 @@ int scanhash_x16rt_2x64( struct work *work, uint32_t max_nonce,
x16rt_getTimeHash( masked_ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
s_ntime = masked_ntime;
if ( !thr_id )
applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x",
x16r_hash_order, bswap_32( pdata[17] ), timeHash );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "Hash order %s, Ntime %08x",
x16r_hash_order, bswap_32( pdata[17] ) );
}
x16r_2x64_prehash( vdata, pdata );
x16r_2x64_prehash( vdata, pdata, x16r_hash_order );
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do
{

View File

@@ -31,7 +31,7 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
x16r_hash_order, swab32( pdata[17] ), timeHash );
}
x16r_prehash( edata, pdata );
x16r_prehash( edata, pdata, x16r_hash_order );
do
{

View File

@@ -593,7 +593,6 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t bedata1[2] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -606,19 +605,15 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
vdata[1] = bswap_32( pdata[1] );
vdata[2] = bswap_32( pdata[2] );
saved_height = work->height;
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
applog( LOG_INFO, "hash order %s", x16r_hash_order );
}
// Do midstate prehash on hash functions with block size <= 64 bytes.
@@ -1108,7 +1103,6 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t vdata32[20*4] __attribute__ ((aligned (64)));
uint32_t edata[20];
uint32_t bedata1[2];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -1121,17 +1115,15 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0fff;
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32(pdata[17]);
if ( s_ntime != ntime )
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
vdata[1] = bswap_32( pdata[1] );
vdata[2] = bswap_32( pdata[2] );
saved_height = work->height;
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
applog( LOG_INFO, "hash order %s", x16r_hash_order );
}
// Do midstate prehash on hash functions with block size <= 64 bytes.
@@ -1550,7 +1542,6 @@ int scanhash_x16rv2_2x64( struct work *work, uint32_t max_nonce,
uint32_t hash[2*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*2] __attribute__ ((aligned (64)));
uint32_t edata[20];
uint32_t bedata1[2];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -1563,17 +1554,15 @@ int scanhash_x16rv2_2x64( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0fff;
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32(pdata[17]);
if ( s_ntime != ntime )
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
vdata[1] = bswap_32( pdata[1] );
vdata[2] = bswap_32( pdata[2] );
saved_height = work->height;
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
applog( LOG_INFO, "hash order %s", x16r_hash_order );
}
// Do midstate prehash on hash functions with block size <= 64 bytes.

362
algo/x16/x20r.c Normal file
View File

@@ -0,0 +1,362 @@
#include "miner.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/sha/sph_sha2.h"
#include "x16r-gate.h"
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define X20R_8WAY 1
#elif defined(__AVX2__) && defined(__AES__)
#define X20R_4WAY 1
#elif defined(__SSE2__) || defined(__ARM_NEON)
#define X20R_2WAY 1
#endif
// X20R is not what it seems. It does not permute 20 functions over 20 rounds,
// it only permutes 16 of them. The last 4 functions are victims of trying to
// fit 20 elements in the space for only 16. Arithmetic overflow recycles the
// first 4 functions. Otherwise it's identical to X16R.
// Welcome to the real X20R.
#define X20R_HASH_FUNC_COUNT 20
/*
enum x20r_algo
{
BLAKE = 0,
BMW,
GROESTL,
JH,
KECCAK,
SKEIN,
LUFFA,
CUBEHASH,
SHAVITE,
SIMD,
ECHO,
HAMSI,
FUGUE,
SHABAL,
WHIRLPOOL,
SHA512,
HAVAL, // Last 4 names are meaningless and not used
GOST,
RADIOGATUN,
PANAMA,
X20R_HASH_FUNC_COUNT
};
*/
static __thread char x20r_hash_order[ X20R_HASH_FUNC_COUNT + 1 ] = {0};
static void x20r_getAlgoString(const uint8_t* prevblock, char *output)
{
char *sptr = output;
for (int j = 0; j < X20R_HASH_FUNC_COUNT; j++) {
uint8_t b = (19 - j) >> 1; // 16 ascii hex chars, reversed
uint8_t algoDigit = (j & 1) ? prevblock[b] & 0xF : prevblock[b] >> 4;
if (algoDigit >= 10)
sprintf(sptr, "%c", 'A' + (algoDigit - 10));
else
sprintf(sptr, "%u", (uint32_t) algoDigit);
sptr++;
}
*sptr = '\0';
}
#if defined(X20R_8WAY)
int x20r_8x64_hash( void* output, const void* input, int thrid )
{
uint8_t hash[64*8] __attribute__ ((aligned (128)));
if ( !x16r_8x64_hash_generic( hash, input, thrid, x20r_hash_order,
X20R_HASH_FUNC_COUNT ) )
return 0;
memcpy( output, hash, 32 );
memcpy( output+32, hash+64, 32 );
memcpy( output+64, hash+128, 32 );
memcpy( output+96, hash+192, 32 );
memcpy( output+128, hash+256, 32 );
memcpy( output+160, hash+320, 32 );
memcpy( output+192, hash+384, 32 );
memcpy( output+224, hash+448, 32 );
return 1;
}
int scanhash_x20r_8x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[16*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
const int thr_id = mythr->id;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x0cff;
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
vdata[1] = bswap_32( pdata[1] );
vdata[2] = bswap_32( pdata[2] );
vdata[3] = bswap_32( pdata[3] );
saved_height = work->height;
x20r_getAlgoString( (const uint8_t*)(&vdata[1]), x20r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x20r_hash_order );
}
x16r_8x64_prehash( vdata, pdata, x20r_hash_order );
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if( x20r_8x64_hash( hash, vdata, thr_id ) );
for ( int i = 0; i < 8; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n+i );
submit_solution( work, hash+(i<<3), mythr );
}
*noncev = _mm512_add_epi32( *noncev,
_mm512_set1_epi64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined(X20R_4WAY)
int x20r_4x64_hash( void* output, const void* input, int thrid )
{
uint8_t hash[64*4] __attribute__ ((aligned (64)));
if ( !x16r_4x64_hash_generic( hash, input, thrid, x20r_hash_order,
X20R_HASH_FUNC_COUNT ) )
return 0;
memcpy( output, hash, 32 );
memcpy( output+32, hash+64, 32 );
memcpy( output+64, hash+128, 32 );
memcpy( output+96, hash+192, 32 );
return 1;
}
int scanhash_x20r_4x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[16*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if ( bench ) ptarget[7] = 0x0cff;
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
vdata[1] = bswap_32( pdata[1] );
vdata[2] = bswap_32( pdata[2] );
vdata[3] = bswap_32( pdata[3] );
saved_height = work->height;
x20r_getAlgoString( (const uint8_t*)(&vdata[1]), x20r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x20r_hash_order );
}
x16r_4x64_prehash( vdata, pdata, x20r_hash_order );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if ( x20r_4x64_hash( hash, vdata, thr_id ) );
for ( int i = 0; i < 4; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n+i );
submit_solution( work, hash+(i<<3), mythr );
}
*noncev = _mm256_add_epi32( *noncev,
_mm256_set1_epi64x( 0x0000000400000000 ) );
n += 4;
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined(X20R_2WAY)
int x20r_2x64_hash( void* output, const void* input, int thrid )
{
uint8_t hash[64*2] __attribute__ ((aligned (64)));
if ( !x16r_2x64_hash_generic( hash, input, thrid, x20r_hash_order,
X20R_HASH_FUNC_COUNT ) )
return 0;
memcpy( output, hash, 32 );
memcpy( output+32, hash+64, 32 );
return 1;
}
int scanhash_x20r_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[16*2] __attribute__ ((aligned (64)));
uint32_t vdata[20*2] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 2;
uint32_t n = first_nonce;
v128_t *noncev = (v128_t*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if ( bench ) ptarget[7] = 0x0cff;
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
vdata[1] = bswap_32( pdata[1] );
vdata[2] = bswap_32( pdata[2] );
vdata[3] = bswap_32( pdata[3] );
saved_height = work->height;
x20r_getAlgoString( (const uint8_t*)(&vdata[1]), x20r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x20r_hash_order );
}
x16r_2x64_prehash( vdata, pdata, x20r_hash_order );
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do
{
if ( x20r_2x64_hash( hash, vdata, thr_id ) );
for ( int i = 0; i < 2; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n+i );
submit_solution( work, hash+(i<<3), mythr );
}
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
n += 2;
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#else
int x20r_hash( void* output, const void* input, int thrid )
{
uint8_t hash[64] __attribute__ ((aligned (64)));
if ( !x16r_hash_generic( hash, input, thrid, x20r_hash_order,
X20R_HASH_FUNC_COUNT ) )
return 0;
memcpy( output, hash, 32 );
return 1;
}
int scanhash_x20r( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(32) hash32[8];
uint32_t _ALIGN(32) edata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const int thr_id = mythr->id;
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &( work_restart[thr_id].restart );
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x0cff;
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
edata[1] = bswap_32( pdata[1] );
edata[2] = bswap_32( pdata[2] );
edata[3] = bswap_32( pdata[3] );
saved_height = work->height;
x20r_getAlgoString( (const uint8_t*)(&edata[1]), x20r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x20r_hash_order );
}
x16r_prehash( edata, pdata, x20r_hash_order );
do
{
edata[19] = nonce;
if ( x20r_hash( hash32, edata, thr_id ) )
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( nonce );
submit_solution( work, hash32, mythr );
}
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
return 0;
}
#endif
bool register_x20r_algo( algo_gate_t* gate )
{
#if defined (X20R_8WAY)
gate->scanhash = (void*)&scanhash_x20r_8x64;
#elif defined (X20R_4WAY)
gate->scanhash = (void*)&scanhash_x20r_4x64;
#elif defined (X20R_2WAY)
gate->scanhash = (void*)&scanhash_x20r_2x64;
#else
gate->scanhash = (void*)&scanhash_x20r;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
| NEON_OPT;
opt_target_factor = 256.0;
return true;
};

View File

@@ -43,7 +43,8 @@ int x21s_8way_hash( void* output, const void* input, int thrid )
uint32_t *hash7 = (uint32_t*)( shash+448 );
x21s_8way_context_overlay ctx;
if ( !x16r_8way_hash_generic( shash, input, thrid ) )
if ( !x16r_8way_hash_generic( shash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0;
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -135,7 +136,6 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &hash[7<<3];
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t bedata1[2] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
@@ -149,20 +149,18 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff;
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
vdata[1] = bswap_32( pdata[1] );
vdata[2] = bswap_32( pdata[2] );
saved_height = work->height;
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x16r_hash_order );
}
x16r_8way_prehash( vdata, pdata );
x16r_8way_prehash( vdata, pdata, x16r_hash_order );
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
@@ -224,7 +222,8 @@ int x21s_4way_hash( void* output, const void* input, int thrid )
uint32_t *hash2 = (uint32_t*)( shash+128 );
uint32_t *hash3 = (uint32_t*)( shash+192 );
if ( !x16r_4way_hash_generic( shash, input, thrid ) )
if ( !x16r_4way_hash_generic( shash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0;
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
@@ -308,20 +307,18 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff;
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime );
vdata[1] = bswap_32( pdata[1] );
vdata[2] = bswap_32( pdata[2] );
saved_height = work->height;
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x16r_hash_order );
}
x16r_4way_prehash( vdata, pdata );
x16r_4way_prehash( vdata, pdata, x16r_hash_order );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
@@ -372,7 +369,8 @@ int x21s_2x64_hash( void* output, const void* input, int thrid )
uint32_t *hash0 = (uint32_t*) shash;
uint32_t *hash1 = (uint32_t*)( shash+64 );
if ( !x16r_2x64_hash_generic( shash, input, thrid ) )
if ( !x16r_2x64_hash_generic( shash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0;
sph_haval256_5_init( &ctx.haval );
@@ -412,7 +410,6 @@ int scanhash_x21s_2x64( struct work *work, uint32_t max_nonce,
{
uint32_t hash[16*2] __attribute__ ((aligned (64)));
uint32_t vdata[20*2] __attribute__ ((aligned (64)));
uint32_t bedata1[2] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -425,20 +422,18 @@ int scanhash_x21s_2x64( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff;
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
static __thread uint32_t saved_height = UINT32_MAX;
if ( work->height != saved_height )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime );
vdata[1] = bswap_32( pdata[1] );
vdata[2] = bswap_32( pdata[2] );
saved_height = work->height;
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s", x16r_hash_order );
}
x16r_2x64_prehash( vdata, pdata );
x16r_2x64_prehash( vdata, pdata, x16r_hash_order );
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do
{

View File

@@ -33,7 +33,8 @@ int x21s_hash( void* output, const void* input, int thrid )
uint32_t _ALIGN(128) hash[16];
x21s_context_overlay ctx;
if ( !x16r_hash_generic( hash, input, thrid ) )
if ( !x16r_hash_generic( hash, input, thrid, x16r_hash_order,
X16R_HASH_FUNC_COUNT ) )
return 0;
sph_haval256_5_init( &ctx.haval );
@@ -84,7 +85,7 @@ int scanhash_x21s( struct work *work, uint32_t max_nonce,
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
}
x16r_prehash( edata, pdata );
x16r_prehash( edata, pdata, x16r_hash_order );
do
{

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.12.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.13.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -608,8 +608,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='23.12'
PACKAGE_STRING='cpuminer-opt 23.12'
PACKAGE_VERSION='23.13'
PACKAGE_STRING='cpuminer-opt 23.13'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 23.12 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 23.13 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1432,7 +1432,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 23.12:";;
short | recursive ) echo "Configuration of cpuminer-opt 23.13:";;
esac
cat <<\_ACEOF
@@ -1538,7 +1538,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 23.12
cpuminer-opt configure 23.13
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 23.12, which was
It was created by cpuminer-opt $as_me 23.13, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
@@ -3593,7 +3593,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='23.12'
VERSION='23.13'
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 23.12, which was
This file was extended by cpuminer-opt $as_me 23.13, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
cpuminer-opt config.status 23.12
cpuminer-opt config.status 23.13
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [23.12])
AC_INIT([cpuminer-opt], [23.13])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 23.12.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 23.13.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='23.12'
PACKAGE_STRING='cpuminer-opt 23.12'
PACKAGE_VERSION='23.13'
PACKAGE_STRING='cpuminer-opt 23.13'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 23.12 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 23.13 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 23.12:";;
short | recursive ) echo "Configuration of cpuminer-opt 23.13:";;
esac
cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 23.12
cpuminer-opt configure 23.13
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 23.12, which was
It was created by cpuminer-opt $as_me 23.13, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='23.12'
VERSION='23.13'
cat >>confdefs.h <<_ACEOF
@@ -6718,7 +6718,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 23.12, which was
This file was extended by cpuminer-opt $as_me 23.13, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6784,7 +6784,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 23.12
cpuminer-opt config.status 23.13
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -2837,15 +2837,6 @@ static void show_credits()
#define check_cpu_capability() cpu_capability( false )
#define display_cpu_capability() cpu_capability( true )
#if defined(__aarch64__)
#define XSTR(x) STR(x)
#define STR(x) #x
//#pragma message "Building for armv" XSTR(__ARM_ARCH)
#endif
static bool cpu_capability( bool display_only )
{
char cpu_brand[0x40];

View File

@@ -672,6 +672,7 @@ enum algos {
ALGO_X16RT_VEIL,
ALGO_X16S,
ALGO_X17,
ALGO_X20R,
ALGO_X21S,
ALGO_X22I,
ALGO_X25X,
@@ -767,6 +768,7 @@ static const char* const algo_names[] = {
"x16rt-veil",
"x16s",
"x17",
"x20r",
"x21s",
"x22i",
"x25x",
@@ -930,6 +932,7 @@ Options:\n\
x16rt-veil Veil (VEIL)\n\
x16s\n\
x17\n\
x20r\n\
x21s\n\
x22i\n\
x25x\n\

View File

@@ -381,7 +381,7 @@ static inline void dintrlv_4x32_512( void *dst0, void *dst1, void *dst2,
d0[15] = s[ 60]; d1[15] = s[ 61]; d2[15] = s[ 62]; d3[15] = s[ 63];
}
#endif // SSE4_1 else SSE2 or NEON
#endif // SSE4_1 or NEON else SSE2
static inline void extr_lane_4x32( void *d, const void *s,
const int lane, const int bit_len )

View File

@@ -40,7 +40,7 @@
#define v128u8_load( p ) vld1q_u16( (uint8_t*)(p) )
#define v128u8_store( p, v ) vst1q_u16( (uint8_t*)(p), v )
// load & set1 combined
// load & set1 combined, doesn't work
#define v128_load1_64(p) vld1q_dup_u64( (uint64_t*)(p) )
#define v128_load1_32(p) vld1q_dup_u32( (uint32_t*)(p) )
#define v128_load1_16(p) vld1q_dup_u16( (uint16_t*)(p) )

View File

@@ -930,7 +930,9 @@ static inline void cpu_brand_string( char* s )
#elif defined(__arm__) || defined(__aarch64__)
sprintf( s, "ARM 64 bit CPU" );
unsigned int cpu_info[4] = { 0 };
cpuid( 0, 0, cpu_info );
sprintf( s, "ARM 64 bit CPU, HWCAP %08x", cpu_info[0] );
#else