mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
363 lines
11 KiB
C
363 lines
11 KiB
C
#include "miner.h"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "algo/blake/sph_blake.h"
|
|
#include "algo/bmw/sph_bmw.h"
|
|
#include "algo/groestl/sph_groestl.h"
|
|
#include "algo/jh/sph_jh.h"
|
|
#include "algo/keccak/sph_keccak.h"
|
|
#include "algo/skein/sph_skein.h"
|
|
#include "algo/luffa/sph_luffa.h"
|
|
#include "algo/cubehash/sph_cubehash.h"
|
|
#include "algo/shavite/sph_shavite.h"
|
|
#include "algo/simd/sph_simd.h"
|
|
#include "algo/echo/sph_echo.h"
|
|
#include "algo/hamsi/sph_hamsi.h"
|
|
#include "algo/fugue/sph_fugue.h"
|
|
#include "algo/shabal/sph_shabal.h"
|
|
#include "algo/whirlpool/sph_whirlpool.h"
|
|
#include "algo/sha/sph_sha2.h"
|
|
#include "x16r-gate.h"
|
|
|
|
#if defined(SIMD512)
|
|
#define X20R_8WAY 1
|
|
#elif defined(__AVX2__) && defined(__AES__)
|
|
#define X20R_4WAY 1
|
|
#elif defined(__SSE2__) || defined(__ARM_NEON)
|
|
#define X20R_2WAY 1
|
|
#endif
|
|
|
|
// X20R is not what it seems. It does not permute 20 functions over 20 rounds,
|
|
// it only permutes 16 of them. The last 4 functions are victims of trying to
|
|
// fit 20 elements in the space for only 16. Arithmetic overflow recycles the
|
|
// first 4 functions. Otherwise it's identical to X16R.
|
|
// Welcome to the real X20R.
|
|
|
|
#define X20R_HASH_FUNC_COUNT 20
|
|
/*
|
|
enum x20r_algo
|
|
{
|
|
BLAKE = 0,
|
|
BMW,
|
|
GROESTL,
|
|
JH,
|
|
KECCAK,
|
|
SKEIN,
|
|
LUFFA,
|
|
CUBEHASH,
|
|
SHAVITE,
|
|
SIMD,
|
|
ECHO,
|
|
HAMSI,
|
|
FUGUE,
|
|
SHABAL,
|
|
WHIRLPOOL,
|
|
SHA512,
|
|
HAVAL, // Last 4 names are meaningless and not used
|
|
GOST,
|
|
RADIOGATUN,
|
|
PANAMA,
|
|
X20R_HASH_FUNC_COUNT
|
|
};
|
|
*/
|
|
static __thread char x20r_hash_order[ X20R_HASH_FUNC_COUNT + 1 ] = {0};
|
|
|
|
static void x20r_getAlgoString(const uint8_t* prevblock, char *output)
|
|
{
|
|
char *sptr = output;
|
|
|
|
for (int j = 0; j < X20R_HASH_FUNC_COUNT; j++) {
|
|
uint8_t b = (19 - j) >> 1; // 16 ascii hex chars, reversed
|
|
uint8_t algoDigit = (j & 1) ? prevblock[b] & 0xF : prevblock[b] >> 4;
|
|
if (algoDigit >= 10)
|
|
sprintf(sptr, "%c", 'A' + (algoDigit - 10));
|
|
else
|
|
sprintf(sptr, "%u", (uint32_t) algoDigit);
|
|
sptr++;
|
|
}
|
|
*sptr = '\0';
|
|
}
|
|
|
|
#if defined(X20R_8WAY)
|
|
|
|
int x20r_8x64_hash( void* output, const void* input, int thrid )
|
|
{
|
|
uint8_t hash[64*8] __attribute__ ((aligned (128)));
|
|
if ( !x16r_8x64_hash_generic( hash, input, thrid, x20r_hash_order,
|
|
X20R_HASH_FUNC_COUNT ) )
|
|
return 0;
|
|
|
|
memcpy( output, hash, 32 );
|
|
memcpy( output+32, hash+64, 32 );
|
|
memcpy( output+64, hash+128, 32 );
|
|
memcpy( output+96, hash+192, 32 );
|
|
memcpy( output+128, hash+256, 32 );
|
|
memcpy( output+160, hash+320, 32 );
|
|
memcpy( output+192, hash+384, 32 );
|
|
memcpy( output+224, hash+448, 32 );
|
|
|
|
return 1;
|
|
}
|
|
|
|
int scanhash_x20r_8x64( struct work *work, uint32_t max_nonce,
|
|
uint64_t *hashes_done, struct thr_info *mythr)
|
|
{
|
|
uint32_t hash[16*8] __attribute__ ((aligned (128)));
|
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
|
uint32_t *pdata = work->data;
|
|
uint32_t *ptarget = work->target;
|
|
const uint32_t first_nonce = pdata[19];
|
|
const uint32_t last_nonce = max_nonce - 8;
|
|
uint32_t n = first_nonce;
|
|
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
|
const int thr_id = mythr->id;
|
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
|
const bool bench = opt_benchmark;
|
|
|
|
if ( bench ) ptarget[7] = 0x0cff;
|
|
|
|
static __thread uint32_t saved_height = UINT32_MAX;
|
|
if ( work->height != saved_height )
|
|
{
|
|
vdata[1] = bswap_32( pdata[1] );
|
|
vdata[2] = bswap_32( pdata[2] );
|
|
vdata[3] = bswap_32( pdata[3] );
|
|
saved_height = work->height;
|
|
x20r_getAlgoString( (const uint8_t*)(&vdata[1]), x20r_hash_order );
|
|
if ( !opt_quiet && !thr_id )
|
|
applog( LOG_INFO, "hash order %s", x20r_hash_order );
|
|
}
|
|
|
|
x16r_8x64_prehash( vdata, pdata, x20r_hash_order );
|
|
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
|
|
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
|
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
|
do
|
|
{
|
|
if ( x20r_8x64_hash( hash, vdata, thr_id ) )
|
|
for ( int i = 0; i < 8; i++ )
|
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
|
{
|
|
pdata[19] = bswap_32( n+i );
|
|
submit_solution( work, hash+(i<<3), mythr );
|
|
}
|
|
*noncev = _mm512_add_epi32( *noncev,
|
|
_mm512_set1_epi64( 0x0000000800000000 ) );
|
|
n += 8;
|
|
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
|
|
pdata[19] = n;
|
|
*hashes_done = n - first_nonce;
|
|
return 0;
|
|
}
|
|
|
|
|
|
#elif defined(X20R_4WAY)
|
|
|
|
int x20r_4x64_hash( void* output, const void* input, int thrid )
|
|
{
|
|
uint8_t hash[64*4] __attribute__ ((aligned (64)));
|
|
if ( !x16r_4x64_hash_generic( hash, input, thrid, x20r_hash_order,
|
|
X20R_HASH_FUNC_COUNT ) )
|
|
return 0;
|
|
|
|
memcpy( output, hash, 32 );
|
|
memcpy( output+32, hash+64, 32 );
|
|
memcpy( output+64, hash+128, 32 );
|
|
memcpy( output+96, hash+192, 32 );
|
|
|
|
return 1;
|
|
}
|
|
|
|
int scanhash_x20r_4x64( struct work *work, uint32_t max_nonce,
|
|
uint64_t *hashes_done, struct thr_info *mythr)
|
|
{
|
|
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
|
uint32_t *pdata = work->data;
|
|
uint32_t *ptarget = work->target;
|
|
const uint32_t first_nonce = pdata[19];
|
|
const uint32_t last_nonce = max_nonce - 4;
|
|
uint32_t n = first_nonce;
|
|
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
|
const int thr_id = mythr->id;
|
|
const bool bench = opt_benchmark;
|
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
|
|
|
if ( bench ) ptarget[7] = 0x0cff;
|
|
|
|
static __thread uint32_t saved_height = UINT32_MAX;
|
|
if ( work->height != saved_height )
|
|
{
|
|
vdata[1] = bswap_32( pdata[1] );
|
|
vdata[2] = bswap_32( pdata[2] );
|
|
vdata[3] = bswap_32( pdata[3] );
|
|
saved_height = work->height;
|
|
x20r_getAlgoString( (const uint8_t*)(&vdata[1]), x20r_hash_order );
|
|
if ( !opt_quiet && !thr_id )
|
|
applog( LOG_INFO, "hash order %s", x20r_hash_order );
|
|
}
|
|
|
|
x16r_4x64_prehash( vdata, pdata, x20r_hash_order );
|
|
*noncev = mm256_intrlv_blend_32(
|
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
|
do
|
|
{
|
|
if ( x20r_4x64_hash( hash, vdata, thr_id ) )
|
|
for ( int i = 0; i < 4; i++ )
|
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
|
{
|
|
pdata[19] = bswap_32( n+i );
|
|
submit_solution( work, hash+(i<<3), mythr );
|
|
}
|
|
*noncev = _mm256_add_epi32( *noncev,
|
|
_mm256_set1_epi64x( 0x0000000400000000 ) );
|
|
n += 4;
|
|
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
|
|
pdata[19] = n;
|
|
*hashes_done = n - first_nonce;
|
|
return 0;
|
|
}
|
|
|
|
#elif defined(X20R_2WAY)
|
|
|
|
int x20r_2x64_hash( void* output, const void* input, int thrid )
|
|
{
|
|
uint8_t hash[64*2] __attribute__ ((aligned (64)));
|
|
if ( !x16r_2x64_hash_generic( hash, input, thrid, x20r_hash_order,
|
|
X20R_HASH_FUNC_COUNT ) )
|
|
return 0;
|
|
|
|
memcpy( output, hash, 32 );
|
|
memcpy( output+32, hash+64, 32 );
|
|
|
|
return 1;
|
|
}
|
|
|
|
int scanhash_x20r_2x64( struct work *work, uint32_t max_nonce,
|
|
uint64_t *hashes_done, struct thr_info *mythr)
|
|
{
|
|
uint32_t hash[16*2] __attribute__ ((aligned (64)));
|
|
uint32_t vdata[20*2] __attribute__ ((aligned (64)));
|
|
uint32_t *pdata = work->data;
|
|
uint32_t *ptarget = work->target;
|
|
const uint32_t first_nonce = pdata[19];
|
|
const uint32_t last_nonce = max_nonce - 2;
|
|
uint32_t n = first_nonce;
|
|
v128_t *noncev = (v128_t*)vdata + 9;
|
|
const int thr_id = mythr->id;
|
|
const bool bench = opt_benchmark;
|
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
|
|
|
if ( bench ) ptarget[7] = 0x0cff;
|
|
|
|
static __thread uint32_t saved_height = UINT32_MAX;
|
|
if ( work->height != saved_height )
|
|
{
|
|
vdata[1] = bswap_32( pdata[1] );
|
|
vdata[2] = bswap_32( pdata[2] );
|
|
vdata[3] = bswap_32( pdata[3] );
|
|
saved_height = work->height;
|
|
x20r_getAlgoString( (const uint8_t*)(&vdata[1]), x20r_hash_order );
|
|
if ( !opt_quiet && !thr_id )
|
|
applog( LOG_INFO, "hash order %s", x20r_hash_order );
|
|
}
|
|
|
|
x16r_2x64_prehash( vdata, pdata, x20r_hash_order );
|
|
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
|
|
do
|
|
{
|
|
if ( x20r_2x64_hash( hash, vdata, thr_id ) )
|
|
for ( int i = 0; i < 2; i++ )
|
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
|
{
|
|
pdata[19] = bswap_32( n+i );
|
|
submit_solution( work, hash+(i<<3), mythr );
|
|
}
|
|
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
|
|
n += 2;
|
|
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
|
|
pdata[19] = n;
|
|
*hashes_done = n - first_nonce;
|
|
return 0;
|
|
}
|
|
|
|
#else
|
|
|
|
int x20r_hash( void* output, const void* input, int thrid )
|
|
{
|
|
uint8_t hash[64] __attribute__ ((aligned (64)));
|
|
if ( !x16r_hash_generic( hash, input, thrid, x20r_hash_order,
|
|
X20R_HASH_FUNC_COUNT ) )
|
|
return 0;
|
|
|
|
memcpy( output, hash, 32 );
|
|
return 1;
|
|
}
|
|
|
|
int scanhash_x20r( struct work *work, uint32_t max_nonce,
|
|
uint64_t *hashes_done, struct thr_info *mythr )
|
|
{
|
|
uint32_t _ALIGN(32) hash32[8];
|
|
uint32_t _ALIGN(32) edata[20];
|
|
uint32_t *pdata = work->data;
|
|
uint32_t *ptarget = work->target;
|
|
const uint32_t first_nonce = pdata[19];
|
|
const int thr_id = mythr->id;
|
|
uint32_t nonce = first_nonce;
|
|
volatile uint8_t *restart = &( work_restart[thr_id].restart );
|
|
const bool bench = opt_benchmark;
|
|
if ( bench ) ptarget[7] = 0x0cff;
|
|
|
|
static __thread uint32_t saved_height = UINT32_MAX;
|
|
if ( work->height != saved_height )
|
|
{
|
|
edata[1] = bswap_32( pdata[1] );
|
|
edata[2] = bswap_32( pdata[2] );
|
|
edata[3] = bswap_32( pdata[3] );
|
|
saved_height = work->height;
|
|
x20r_getAlgoString( (const uint8_t*)(&edata[1]), x20r_hash_order );
|
|
if ( !opt_quiet && !thr_id )
|
|
applog( LOG_INFO, "hash order %s", x20r_hash_order );
|
|
}
|
|
|
|
x16r_prehash( edata, pdata, x20r_hash_order );
|
|
|
|
do
|
|
{
|
|
edata[19] = nonce;
|
|
if ( x20r_hash( hash32, edata, thr_id ) )
|
|
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
|
|
{
|
|
pdata[19] = bswap_32( nonce );
|
|
submit_solution( work, hash32, mythr );
|
|
}
|
|
nonce++;
|
|
} while ( nonce < max_nonce && !(*restart) );
|
|
pdata[19] = nonce;
|
|
*hashes_done = pdata[19] - first_nonce;
|
|
return 0;
|
|
}
|
|
|
|
#endif
|
|
|
|
bool register_x20r_algo( algo_gate_t* gate )
|
|
{
|
|
#if defined (X20R_8WAY)
|
|
gate->scanhash = (void*)&scanhash_x20r_8x64;
|
|
#elif defined (X20R_4WAY)
|
|
gate->scanhash = (void*)&scanhash_x20r_4x64;
|
|
#elif defined (X20R_2WAY)
|
|
gate->scanhash = (void*)&scanhash_x20r_2x64;
|
|
#else
|
|
gate->scanhash = (void*)&scanhash_x20r;
|
|
#endif
|
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
|
|
| NEON_OPT;
|
|
opt_target_factor = 256.0;
|
|
return true;
|
|
};
|
|
|