mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
c24a4bdbc2 | ||
![]() |
59c7848d91 | ||
![]() |
3c02653dbe | ||
![]() |
502ed0b1fe |
@@ -45,7 +45,10 @@ cpuminer_SOURCES = \
|
||||
algo/blake/sph_blake2b.c \
|
||||
algo/blake/blake2b.c \
|
||||
algo/blake/sph-blake2s.c \
|
||||
algo/blake/blake2s-hash-4way.c \
|
||||
algo/blake/blake2s.c \
|
||||
algo/blake/blake2s-gate.c \
|
||||
algo/blake/blake2s-4way.c \
|
||||
algo/blake/blakecoin-gate.c \
|
||||
algo/blake/mod_blakecoin.c \
|
||||
algo/blake/blakecoin.c \
|
||||
|
@@ -34,11 +34,12 @@ Others may work but may require more effort.
|
||||
|
||||
MacOS, OSx is not supported.
|
||||
|
||||
3. Stratum pool. Some algos may work wallet mining using getwork.
|
||||
3. Stratum pool. Some algos may work wallet mining using getwork or GBT. YMMV.
|
||||
|
||||
Supported Algorithms
|
||||
--------------------
|
||||
|
||||
allium Garlicoin
|
||||
anime Animecoin
|
||||
argon2
|
||||
axiom Shabal-256 MemoHash
|
||||
@@ -107,7 +108,7 @@ Supported Algorithms
|
||||
x17
|
||||
xevan Bitsend
|
||||
yescrypt Globalboost-Y (BSTY)
|
||||
yescryptr8 BitZeny (ZNY)\n\
|
||||
yescryptr8 BitZeny (ZNY)
|
||||
yescryptr16 Yenten (YTN)
|
||||
zr5 Ziftr
|
||||
|
||||
|
@@ -134,7 +134,7 @@ cd /c/path/to/cpuminer-opt
|
||||
Run build.sh to build on Windows or execute the following commands.
|
||||
|
||||
./autogen.sh
|
||||
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
|
||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
||||
make
|
||||
|
||||
Start mining
|
||||
@@ -159,6 +159,31 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.8.3.2
|
||||
|
||||
Reverted gbt changes from v3.8.0 that broke getwork.
|
||||
Reverted scaled hash rate for API, added HS term in addition to KHS.
|
||||
Added blocks solved to console display and API.
|
||||
|
||||
v3.8.3.1
|
||||
|
||||
Fixed regression in v3.8.3 that broke several algos.
|
||||
|
||||
v3.8.3
|
||||
|
||||
More restoration of lost lyra2 hash.
|
||||
8 way AVX2 and 4way AVX optimization for blakecoin, vanilla & blake2s.
|
||||
8 way AVX2 for lbry.
|
||||
Scaled hashrate for API output.
|
||||
A couple of GBT fixes.
|
||||
|
||||
v3.8.2.1
|
||||
|
||||
Fixed low difficulty rejects with allium.
|
||||
Fixed qubit AVX2.
|
||||
Restored lyra2z lost hash.
|
||||
Fixed build.sh
|
||||
|
||||
v3.8.2
|
||||
|
||||
Fixed and faster myr-gr.
|
||||
|
@@ -1,19 +1,18 @@
|
||||
#include "blake-gate.h"
|
||||
|
||||
#if defined (BLAKE_4WAY)
|
||||
|
||||
#include "blake-hash-4way.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
|
||||
blake256r14_4way_context blake_ctx;
|
||||
#if defined (BLAKE_4WAY)
|
||||
|
||||
blake256r14_4way_context blake_4w_ctx;
|
||||
|
||||
void blakehash_4way(void *state, const void *input)
|
||||
{
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake256r14_4way_context ctx;
|
||||
memcpy( &ctx, &blake_ctx, sizeof ctx );
|
||||
memcpy( &ctx, &blake_4w_ctx, sizeof ctx );
|
||||
blake256r14_4way( &ctx, input + (64<<2), 16 );
|
||||
blake256r14_4way_close( &ctx, vhash );
|
||||
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
@@ -31,7 +30,6 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
|
||||
if (opt_benchmark)
|
||||
@@ -39,15 +37,12 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( edata, pdata, 20 );
|
||||
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
blake256r14_4way_init( &blake_ctx );
|
||||
blake256r14_4way( &blake_ctx, vdata, 64 );
|
||||
blake256r14_4way_init( &blake_4w_ctx );
|
||||
blake256r14_4way( &blake_4w_ctx, vdata, 64 );
|
||||
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
@@ -55,34 +50,12 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
blakehash_4way( hash, vdata );
|
||||
|
||||
if ( hash[7] <= HTarget && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
pdata[19] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= HTarget && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= HTarget && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= HTarget && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
@@ -95,3 +68,77 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(BLAKE_8WAY)
|
||||
|
||||
blake256r14_8way_context blake_8w_ctx;
|
||||
|
||||
void blakehash_8way( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
blake256r14_8way_context ctx;
|
||||
memcpy( &ctx, &blake_8w_ctx, sizeof ctx );
|
||||
blake256r14_8way( &ctx, input + (64<<3), 16 );
|
||||
blake256r14_8way_close( &ctx, vhash );
|
||||
mm256_deinterleave_8x32( state, state+ 32, state+ 64, state+ 96,
|
||||
state+128, state+160, state+192, state+224,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
|
||||
if (opt_benchmark)
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( edata, pdata, 20 );
|
||||
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
|
||||
blake256r14_8way_init( &blake_8w_ctx );
|
||||
blake256r14_8way( &blake_8w_ctx, vdata, 64 );
|
||||
|
||||
uint32_t *noncep = vdata + 152; // 19*8
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
be32enc( noncep +4, n+4 );
|
||||
be32enc( noncep +5, n+5 );
|
||||
be32enc( noncep +6, n+6 );
|
||||
be32enc( noncep +7, n+7 );
|
||||
pdata[19] = n;
|
||||
|
||||
blakehash_8way( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+i)[7] <= HTarget && fulltest( hash+i, ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
num_found++;
|
||||
nonces[i] = n+i;
|
||||
work_set_target_ratio( work, hash+1 );
|
||||
}
|
||||
n += 8;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -58,6 +58,8 @@ extern "C"{
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
// Blake-256
|
||||
|
||||
static const sph_u32 IV256[8] = {
|
||||
SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85),
|
||||
SPH_C32(0x3C6EF372), SPH_C32(0xA54FF53A),
|
||||
@@ -67,6 +69,8 @@ static const sph_u32 IV256[8] = {
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
// Blake-512
|
||||
|
||||
static const sph_u64 IV512[8] = {
|
||||
SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
|
||||
SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
|
||||
@@ -78,7 +82,7 @@ static const sph_u64 IV512[8] = {
|
||||
|
||||
#if SPH_COMPACT_BLAKE_32 || SPH_COMPACT_BLAKE_64
|
||||
|
||||
// Blake-256 4 & 8 way, Blake-512 4way
|
||||
// Blake-256 4 & 8 way, Blake-512 4 way
|
||||
|
||||
static const unsigned sigma[16][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
@@ -371,6 +375,8 @@ do { \
|
||||
|
||||
#if SPH_COMPACT_BLAKE_32
|
||||
|
||||
// Blake-256 4 way
|
||||
|
||||
#define ROUND_S_4WAY(r) do { \
|
||||
GS_4WAY(M[sigma[r][0x0]], M[sigma[r][0x1]], \
|
||||
CS[sigma[r][0x0]], CS[sigma[r][0x1]], V0, V4, V8, VC); \
|
||||
@@ -407,7 +413,7 @@ do { \
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
// BLAKE256 8 WAY
|
||||
// Blake-256 8 way
|
||||
|
||||
#define GS_8WAY( m0, m1, c0, c1, a, b, c, d ) \
|
||||
do { \
|
||||
@@ -487,6 +493,8 @@ do { \
|
||||
|
||||
#endif
|
||||
|
||||
// Blake-256 4 way
|
||||
|
||||
#define DECL_STATE32_4WAY \
|
||||
__m128i H0, H1, H2, H3, H4, H5, H6, H7; \
|
||||
__m128i S0, S1, S2, S3; \
|
||||
@@ -527,6 +535,7 @@ do { \
|
||||
} while (0)
|
||||
|
||||
#if SPH_COMPACT_BLAKE_32
|
||||
// not used
|
||||
|
||||
#define COMPRESS32_4WAY( rounds ) do { \
|
||||
__m128i M[16]; \
|
||||
@@ -778,7 +787,6 @@ do { \
|
||||
S3 ), H7 ); \
|
||||
} while (0)
|
||||
|
||||
|
||||
// Blake-512 4 way
|
||||
|
||||
#define DECL_STATE64_4WAY \
|
||||
@@ -967,6 +975,8 @@ do { \
|
||||
|
||||
#endif
|
||||
|
||||
// Blake-256 4 way
|
||||
|
||||
static const sph_u32 salt_zero_4way_small[4] = { 0, 0, 0, 0 };
|
||||
|
||||
static void
|
||||
@@ -988,52 +998,51 @@ blake32_4way( blake_4way_small_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m128i *vdata = (__m128i*)data;
|
||||
__m128i *buf;
|
||||
size_t ptr;
|
||||
const int buf_size = 64; // number of elements, sizeof/4
|
||||
DECL_STATE32_4WAY
|
||||
size_t ptr;
|
||||
const int buf_size = 64; // number of elements, sizeof/4
|
||||
DECL_STATE32_4WAY
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
if ( len < buf_size - ptr )
|
||||
{
|
||||
memcpy_128( buf + (ptr>>2), vdata, len>>2 );
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
if ( len < buf_size - ptr )
|
||||
{
|
||||
memcpy_128( buf + (ptr>>2), vdata, len>>2 );
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
READ_STATE32_4WAY(sc);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
|
||||
READ_STATE32_4WAY(sc);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
|
||||
clen = buf_size - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy_128( buf + (ptr>>2), vdata, clen>>2 );
|
||||
ptr += clen;
|
||||
vdata += (clen>>2);
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
if ( ( T0 = SPH_T32(T0 + 512) ) < 512 )
|
||||
T1 = SPH_T32(T1 + 1);
|
||||
COMPRESS32_4WAY( sc->rounds );
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE32_4WAY(sc);
|
||||
sc->ptr = ptr;
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_128( buf + (ptr>>2), vdata, clen>>2 );
|
||||
ptr += clen;
|
||||
vdata += (clen>>2);
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
if ( ( T0 = SPH_T32(T0 + 512) ) < 512 )
|
||||
T1 = SPH_T32(T1 + 1);
|
||||
COMPRESS32_4WAY( sc->rounds );
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE32_4WAY(sc);
|
||||
sc->ptr = ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32 )
|
||||
{
|
||||
union {
|
||||
// union {
|
||||
__m128i buf[16];
|
||||
sph_u32 dummy;
|
||||
} u;
|
||||
// sph_u32 dummy;
|
||||
// } u;
|
||||
size_t ptr, k;
|
||||
unsigned bit_len;
|
||||
sph_u32 th, tl;
|
||||
@@ -1041,7 +1050,7 @@ blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,
|
||||
|
||||
ptr = sc->ptr;
|
||||
bit_len = ((unsigned)ptr << 3);
|
||||
u.buf[ptr>>2] = _mm_set1_epi32( 0x80 );
|
||||
buf[ptr>>2] = _mm_set1_epi32( 0x80 );
|
||||
tl = sc->T0 + bit_len;
|
||||
th = sc->T1;
|
||||
|
||||
@@ -1060,26 +1069,26 @@ blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,
|
||||
|
||||
if ( ptr <= 52 )
|
||||
{
|
||||
memset_zero_128( u.buf + (ptr>>2) + 1, (52 - ptr) >> 2 );
|
||||
memset_zero_128( buf + (ptr>>2) + 1, (52 - ptr) >> 2 );
|
||||
if (out_size_w32 == 8)
|
||||
u.buf[52>>2] = _mm_or_si128( u.buf[52>>2],
|
||||
buf[52>>2] = _mm_or_si128( buf[52>>2],
|
||||
_mm_set1_epi32( 0x01000000UL ) );
|
||||
*(u.buf+(56>>2)) = mm_bswap_32( _mm_set1_epi32( th ) );
|
||||
*(u.buf+(60>>2)) = mm_bswap_32( _mm_set1_epi32( tl ) );
|
||||
blake32_4way( sc, u.buf + (ptr>>2), 64 - ptr );
|
||||
*(buf+(56>>2)) = mm_bswap_32( _mm_set1_epi32( th ) );
|
||||
*(buf+(60>>2)) = mm_bswap_32( _mm_set1_epi32( tl ) );
|
||||
blake32_4way( sc, buf + (ptr>>2), 64 - ptr );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_128( u.buf + (ptr>>2) + 1, (60-ptr) >> 2 );
|
||||
blake32_4way( sc, u.buf + (ptr>>2), 64 - ptr );
|
||||
memset_zero_128( buf + (ptr>>2) + 1, (60-ptr) >> 2 );
|
||||
blake32_4way( sc, buf + (ptr>>2), 64 - ptr );
|
||||
sc->T0 = SPH_C32(0xFFFFFE00UL);
|
||||
sc->T1 = SPH_C32(0xFFFFFFFFUL);
|
||||
memset_zero_128( u.buf, 56>>2 );
|
||||
memset_zero_128( buf, 56>>2 );
|
||||
if (out_size_w32 == 8)
|
||||
u.buf[52>>2] = _mm_set1_epi32( 0x01000000UL );
|
||||
*(u.buf+(56>>2)) = mm_bswap_32( _mm_set1_epi32( th ) );
|
||||
*(u.buf+(60>>2)) = mm_bswap_32( _mm_set1_epi32( tl ) );
|
||||
blake32_4way( sc, u.buf, 64 );
|
||||
buf[52>>2] = _mm_set1_epi32( 0x01000000UL );
|
||||
*(buf+(56>>2)) = mm_bswap_32( _mm_set1_epi32( th ) );
|
||||
*(buf+(60>>2)) = mm_bswap_32( _mm_set1_epi32( tl ) );
|
||||
blake32_4way( sc, buf, 64 );
|
||||
}
|
||||
out = (__m128i*)dst;
|
||||
for ( k = 0; k < out_size_w32; k++ )
|
||||
@@ -1114,7 +1123,6 @@ blake32_8way( blake_8way_small_context *sc, const void *data, size_t len )
|
||||
size_t ptr;
|
||||
const int buf_size = 64; // number of elements, sizeof/4
|
||||
DECL_STATE32_8WAY
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
if ( len < buf_size - ptr )
|
||||
@@ -1153,10 +1161,10 @@ static void
|
||||
blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32 )
|
||||
{
|
||||
union {
|
||||
// union {
|
||||
__m256i buf[16];
|
||||
sph_u32 dummy;
|
||||
} u;
|
||||
// sph_u32 dummy;
|
||||
// } u;
|
||||
size_t ptr, k;
|
||||
unsigned bit_len;
|
||||
sph_u32 th, tl;
|
||||
@@ -1164,7 +1172,7 @@ blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
|
||||
|
||||
ptr = sc->ptr;
|
||||
bit_len = ((unsigned)ptr << 3);
|
||||
u.buf[ptr>>2] = _mm256_set1_epi32( 0x80 );
|
||||
buf[ptr>>2] = _mm256_set1_epi32( 0x80 );
|
||||
tl = sc->T0 + bit_len;
|
||||
th = sc->T1;
|
||||
|
||||
@@ -1183,26 +1191,26 @@ blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
|
||||
|
||||
if ( ptr <= 52 )
|
||||
{
|
||||
memset_zero_256( u.buf + (ptr>>2) + 1, (52 - ptr) >> 2 );
|
||||
if (out_size_w32 == 8)
|
||||
u.buf[52>>2] = _mm256_or_si256( u.buf[52>>2],
|
||||
memset_zero_256( buf + (ptr>>2) + 1, (52 - ptr) >> 2 );
|
||||
if ( out_size_w32 == 8 )
|
||||
buf[52>>2] = _mm256_or_si256( buf[52>>2],
|
||||
_mm256_set1_epi32( 0x01000000UL ) );
|
||||
*(u.buf+(56>>2)) = mm256_bswap_32( _mm256_set1_epi32( th ) );
|
||||
*(u.buf+(60>>2)) = mm256_bswap_32( _mm256_set1_epi32( tl ) );
|
||||
blake32_8way( sc, u.buf + (ptr>>2), 64 - ptr );
|
||||
*(buf+(56>>2)) = mm256_bswap_32( _mm256_set1_epi32( th ) );
|
||||
*(buf+(60>>2)) = mm256_bswap_32( _mm256_set1_epi32( tl ) );
|
||||
blake32_8way( sc, buf + (ptr>>2), 64 - ptr );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_256( u.buf + (ptr>>2) + 1, (60-ptr) >> 2 );
|
||||
blake32_8way( sc, u.buf + (ptr>>2), 64 - ptr );
|
||||
memset_zero_256( buf + (ptr>>2) + 1, (60-ptr) >> 2 );
|
||||
blake32_8way( sc, buf + (ptr>>2), 64 - ptr );
|
||||
sc->T0 = SPH_C32(0xFFFFFE00UL);
|
||||
sc->T1 = SPH_C32(0xFFFFFFFFUL);
|
||||
memset_zero_256( u.buf, 56>>2 );
|
||||
if (out_size_w32 == 8)
|
||||
u.buf[52>>2] = _mm256_set1_epi32( 0x01000000UL );
|
||||
*(u.buf+(56>>2)) = mm256_bswap_32( _mm256_set1_epi32( th ) );
|
||||
*(u.buf+(60>>2)) = mm256_bswap_32( _mm256_set1_epi32( tl ) );
|
||||
blake32_8way( sc, u.buf, 64 );
|
||||
memset_zero_256( buf, 56>>2 );
|
||||
if ( out_size_w32 == 8 )
|
||||
buf[52>>2] = _mm256_set1_epi32( 0x01000000UL );
|
||||
*(buf+(56>>2)) = mm256_bswap_32( _mm256_set1_epi32( th ) );
|
||||
*(buf+(60>>2)) = mm256_bswap_32( _mm256_set1_epi32( tl ) );
|
||||
blake32_8way( sc, buf, 64 );
|
||||
}
|
||||
out = (__m256i*)dst;
|
||||
for ( k = 0; k < out_size_w32; k++ )
|
||||
@@ -1274,10 +1282,10 @@ static void
|
||||
blake64_4way_close( blake_4way_big_context *sc,
|
||||
unsigned ub, unsigned n, void *dst, size_t out_size_w64)
|
||||
{
|
||||
union {
|
||||
// union {
|
||||
__m256i buf[16];
|
||||
sph_u64 dummy;
|
||||
} u;
|
||||
// sph_u64 dummy;
|
||||
// } u;
|
||||
size_t ptr, k;
|
||||
unsigned bit_len;
|
||||
uint64_t z, zz;
|
||||
@@ -1288,7 +1296,7 @@ blake64_4way_close( blake_4way_big_context *sc,
|
||||
bit_len = ((unsigned)ptr << 3);
|
||||
z = 0x80 >> n;
|
||||
zz = ((ub & -z) | z) & 0xFF;
|
||||
u.buf[ptr>>3] = _mm256_set_epi64x( zz, zz, zz, zz );
|
||||
buf[ptr>>3] = _mm256_set_epi64x( zz, zz, zz, zz );
|
||||
tl = sc->T0 + bit_len;
|
||||
th = sc->T1;
|
||||
if (ptr == 0 )
|
||||
@@ -1307,33 +1315,33 @@ blake64_4way_close( blake_4way_big_context *sc,
|
||||
}
|
||||
if ( ptr <= 104 )
|
||||
{
|
||||
memset_zero_256( u.buf + (ptr>>3) + 1, (104-ptr) >> 3 );
|
||||
memset_zero_256( buf + (ptr>>3) + 1, (104-ptr) >> 3 );
|
||||
if ( out_size_w64 == 8 )
|
||||
u.buf[(104>>3)] = _mm256_or_si256( u.buf[(104>>3)],
|
||||
buf[(104>>3)] = _mm256_or_si256( buf[(104>>3)],
|
||||
_mm256_set1_epi64x( 0x0100000000000000ULL ) );
|
||||
*(u.buf+(112>>3)) = mm256_bswap_64(
|
||||
*(buf+(112>>3)) = mm256_bswap_64(
|
||||
_mm256_set_epi64x( th, th, th, th ) );
|
||||
*(u.buf+(120>>3)) = mm256_bswap_64(
|
||||
*(buf+(120>>3)) = mm256_bswap_64(
|
||||
_mm256_set_epi64x( tl, tl, tl, tl ) );
|
||||
|
||||
blake64_4way( sc, u.buf + (ptr>>3), 128 - ptr );
|
||||
blake64_4way( sc, buf + (ptr>>3), 128 - ptr );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_256( u.buf + (ptr>>3) + 1, (120 - ptr) >> 3 );
|
||||
memset_zero_256( buf + (ptr>>3) + 1, (120 - ptr) >> 3 );
|
||||
|
||||
blake64_4way( sc, u.buf + (ptr>>3), 128 - ptr );
|
||||
blake64_4way( sc, buf + (ptr>>3), 128 - ptr );
|
||||
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL);
|
||||
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
|
||||
memset_zero_256( u.buf, 112>>3 );
|
||||
memset_zero_256( buf, 112>>3 );
|
||||
if ( out_size_w64 == 8 )
|
||||
u.buf[104>>3] = _mm256_set1_epi64x( 0x0100000000000000ULL );
|
||||
*(u.buf+(112>>3)) = mm256_bswap_64(
|
||||
buf[104>>3] = _mm256_set1_epi64x( 0x0100000000000000ULL );
|
||||
*(buf+(112>>3)) = mm256_bswap_64(
|
||||
_mm256_set_epi64x( th, th, th, th ) );
|
||||
*(u.buf+(120>>3)) = mm256_bswap_64(
|
||||
*(buf+(120>>3)) = mm256_bswap_64(
|
||||
_mm256_set_epi64x( tl, tl, tl, tl ) );
|
||||
|
||||
blake64_4way( sc, u.buf, 128 );
|
||||
blake64_4way( sc, buf, 128 );
|
||||
}
|
||||
out = (__m256i*)dst;
|
||||
for ( k = 0; k < out_size_w64; k++ )
|
||||
@@ -1342,7 +1350,7 @@ blake64_4way_close( blake_4way_big_context *sc,
|
||||
|
||||
#endif
|
||||
|
||||
// Blake-256 4 way & 8 way
|
||||
// Blake-256 4 way
|
||||
|
||||
// default 14 rounds, backward copatibility
|
||||
void
|
||||
@@ -1364,6 +1372,9 @@ blake256_4way_close(void *cc, void *dst)
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// Blake-256 8way
|
||||
|
||||
void
|
||||
blake256_8way_init(void *cc)
|
||||
{
|
||||
|
@@ -35,7 +35,7 @@
|
||||
*/
|
||||
|
||||
#ifndef __BLAKE_HASH_4WAY__
|
||||
#define __BLAKE_HASH_4WAY__
|
||||
#define __BLAKE_HASH_4WAY__ 1
|
||||
|
||||
#ifdef __AVX__
|
||||
|
||||
@@ -117,11 +117,11 @@ void blake256r8_8way_close(void *cc, void *dst);
|
||||
// Blake-512 4 way
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[16] __attribute__ ((aligned (64)));
|
||||
__m256i H[8];
|
||||
__m256i S[4];
|
||||
size_t ptr;
|
||||
sph_u64 T0, T1;
|
||||
__m256i buf[16] __attribute__ ((aligned (64)));
|
||||
__m256i H[8];
|
||||
__m256i S[4];
|
||||
size_t ptr;
|
||||
sph_u64 T0, T1;
|
||||
} blake_4way_big_context;
|
||||
|
||||
typedef blake_4way_big_context blake512_4way_context;
|
||||
|
136
algo/blake/blake2s-4way.c
Normal file
136
algo/blake/blake2s-4way.c
Normal file
@@ -0,0 +1,136 @@
|
||||
#include "blake2s-gate.h"
|
||||
#include "blake2s-hash-4way.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(BLAKE2S_8WAY)
|
||||
|
||||
static __thread blake2s_8way_state blake2s_8w_ctx;
|
||||
|
||||
void blake2s_8way_hash( void *output, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
blake2s_8way_state ctx;
|
||||
memcpy( &ctx, &blake2s_8w_ctx, sizeof ctx );
|
||||
|
||||
blake2s_8way_update( &ctx, input + (64<<3), 16 );
|
||||
blake2s_8way_final( &ctx, vhash, BLAKE2S_OUTBYTES );
|
||||
|
||||
mm256_deinterleave_8x32( output, output+ 32, output+ 64, output+ 96,
|
||||
output+128, output+160, output+192, output+224,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake2s_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 152; // 19*8
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
blake2s_8way_init( &blake2s_8w_ctx, BLAKE2S_OUTBYTES );
|
||||
blake2s_8way_update( &blake2s_8w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
be32enc( noncep +4, n+4 );
|
||||
be32enc( noncep +5, n+5 );
|
||||
be32enc( noncep +6, n+6 );
|
||||
be32enc( noncep +7, n+7 );
|
||||
pdata[19] = n;
|
||||
|
||||
blake2s_8way_hash( hash, vdata );
|
||||
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 8;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#elif defined(BLAKE2S_4WAY)
|
||||
|
||||
static __thread blake2s_4way_state blake2s_4w_ctx;
|
||||
|
||||
void blake2s_4way_hash( void *output, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake2s_4way_state ctx;
|
||||
memcpy( &ctx, &blake2s_4w_ctx, sizeof ctx );
|
||||
|
||||
blake2s_4way_update( &ctx, input + (64<<2), 16 );
|
||||
blake2s_4way_final( &ctx, vhash, BLAKE2S_OUTBYTES );
|
||||
|
||||
mm_deinterleave_4x32( output, output+32, output+64, output+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake2s_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
blake2s_4way_init( &blake2s_4w_ctx, BLAKE2S_OUTBYTES );
|
||||
blake2s_4way_update( &blake2s_4w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
pdata[19] = n;
|
||||
|
||||
blake2s_4way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
27
algo/blake/blake2s-gate.c
Normal file
27
algo/blake/blake2s-gate.c
Normal file
@@ -0,0 +1,27 @@
|
||||
#include "blake2s-gate.h"
|
||||
|
||||
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blake2s_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
}
|
||||
|
||||
bool register_blake2s_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(BLAKE2S_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_blake2s_8way;
|
||||
gate->hash = (void*)&blake2s_8way_hash;
|
||||
#elif defined(BLAKE2S_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_blake2s_4way;
|
||||
gate->hash = (void*)&blake2s_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_blake2s;
|
||||
gate->hash = (void*)&blake2s_hash;
|
||||
#endif
|
||||
gate->get_max64 = (void*)&blake2s_get_max64;
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
35
algo/blake/blake2s-gate.h
Normal file
35
algo/blake/blake2s-gate.h
Normal file
@@ -0,0 +1,35 @@
|
||||
#ifndef __BLAKE2S_GATE_H__
|
||||
#define __BLAKE2S_GATE_H__ 1
|
||||
|
||||
#include <stdint.h>
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__AVX__)
|
||||
#define BLAKE2S_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
#define BLAKE2S_8WAY
|
||||
#endif
|
||||
|
||||
bool register_blake2s_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(BLAKE2S_8WAY)
|
||||
|
||||
void blake2s_8way_hash( void *state, const void *input );
|
||||
int scanhash_blake2s_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#elif defined (BLAKE2S_4WAY)
|
||||
|
||||
void blake2s_4way_hash( void *state, const void *input );
|
||||
int scanhash_blake2s_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#else
|
||||
|
||||
void blake2s_hash( void *state, const void *input );
|
||||
int scanhash_blake2s( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
362
algo/blake/blake2s-hash-4way.c
Normal file
362
algo/blake/blake2s-hash-4way.c
Normal file
@@ -0,0 +1,362 @@
|
||||
/**
|
||||
* BLAKE2 reference source code package - reference C implementations
|
||||
*
|
||||
* Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
||||
*
|
||||
* To the extent possible under law, the author(s) have dedicated all copyright
|
||||
* and related and neighboring rights to this software to the public domain
|
||||
* worldwide. This software is distributed without any warranty.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
* this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#include "blake2s-hash-4way.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined(__AVX__)
|
||||
|
||||
static const uint32_t blake2s_IV[8] =
|
||||
{
|
||||
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
|
||||
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
|
||||
};
|
||||
|
||||
static const uint8_t blake2s_sigma[10][16] =
|
||||
{
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
|
||||
};
|
||||
|
||||
// define a constant for initial param.
|
||||
|
||||
int blake2s_4way_init( blake2s_4way_state *S, const uint8_t outlen )
|
||||
{
|
||||
blake2s_nway_param P[1];
|
||||
|
||||
P->digest_length = outlen;
|
||||
P->key_length = 0;
|
||||
P->fanout = 1;
|
||||
P->depth = 1;
|
||||
P->leaf_length = 0;
|
||||
*((uint64_t*)(P->node_offset)) = 0;
|
||||
P->node_depth = 0;
|
||||
P->inner_length = 0;
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
|
||||
memset( S, 0, sizeof( blake2s_4way_state ) );
|
||||
for( int i = 0; i < 8; ++i )
|
||||
S->h[i] = _mm_set1_epi32( blake2s_IV[i] );
|
||||
|
||||
uint32_t *p = ( uint32_t * )( P );
|
||||
|
||||
/* IV XOR ParamBlock */
|
||||
for ( size_t i = 0; i < 8; ++i )
|
||||
S->h[i] = _mm_xor_si128( S->h[i], _mm_set1_epi32( p[i] ) );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s_4way_compress( blake2s_4way_state *S, const __m128i* block )
|
||||
{
|
||||
__m128i m[16];
|
||||
__m128i v[16];
|
||||
|
||||
memcpy_128( m, block, 16 );
|
||||
memcpy_128( v, S->h, 8 );
|
||||
|
||||
v[ 8] = _mm_set1_epi32( blake2s_IV[0] );
|
||||
v[ 9] = _mm_set1_epi32( blake2s_IV[1] );
|
||||
v[10] = _mm_set1_epi32( blake2s_IV[2] );
|
||||
v[11] = _mm_set1_epi32( blake2s_IV[3] );
|
||||
v[12] = _mm_xor_si128( _mm_set1_epi32( S->t[0] ),
|
||||
_mm_set1_epi32( blake2s_IV[4] ) );
|
||||
v[13] = _mm_xor_si128( _mm_set1_epi32( S->t[1] ),
|
||||
_mm_set1_epi32( blake2s_IV[5] ) );
|
||||
v[14] = _mm_xor_si128( _mm_set1_epi32( S->f[0] ),
|
||||
_mm_set1_epi32( blake2s_IV[6] ) );
|
||||
v[15] = _mm_xor_si128( _mm_set1_epi32( S->f[1] ),
|
||||
_mm_set1_epi32( blake2s_IV[7] ) );
|
||||
|
||||
#define G4W(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+0] ] ); \
|
||||
d = mm_rotr_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm_rotr_32( _mm_xor_si128( b, c ), 12 ); \
|
||||
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+1] ] ); \
|
||||
d = mm_rotr_32( _mm_xor_si128( d, a ), 8 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm_rotr_32( _mm_xor_si128( b, c ), 7 ); \
|
||||
} while(0)
|
||||
|
||||
#define ROUND4W(r) \
|
||||
do { \
|
||||
G4W( r, 0, v[ 0], v[ 4], v[ 8], v[12] ); \
|
||||
G4W( r, 1, v[ 1], v[ 5], v[ 9], v[13] ); \
|
||||
G4W( r, 2, v[ 2], v[ 6], v[10], v[14] ); \
|
||||
G4W( r, 3, v[ 3], v[ 7], v[11], v[15] ); \
|
||||
G4W( r, 4, v[ 0], v[ 5], v[10], v[15] ); \
|
||||
G4W( r, 5, v[ 1], v[ 6], v[11], v[12] ); \
|
||||
G4W( r, 6, v[ 2], v[ 7], v[ 8], v[13] ); \
|
||||
G4W( r, 7, v[ 3], v[ 4], v[ 9], v[14] ); \
|
||||
} while(0)
|
||||
|
||||
ROUND4W( 0 );
|
||||
ROUND4W( 1 );
|
||||
ROUND4W( 2 );
|
||||
ROUND4W( 3 );
|
||||
ROUND4W( 4 );
|
||||
ROUND4W( 5 );
|
||||
ROUND4W( 6 );
|
||||
ROUND4W( 7 );
|
||||
ROUND4W( 8 );
|
||||
ROUND4W( 9 );
|
||||
|
||||
for( size_t i = 0; i < 8; ++i )
|
||||
S->h[i] = _mm_xor_si128( _mm_xor_si128( S->h[i], v[i] ), v[i + 8] );
|
||||
|
||||
#undef G4W
|
||||
#undef ROUND4W
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s_4way_update( blake2s_4way_state *S, const void *in,
|
||||
uint64_t inlen )
|
||||
{
|
||||
__m128i *input = (__m128i*)in;
|
||||
__m128i *buf = (__m128i*)S->buf;
|
||||
const int bsize = BLAKE2S_BLOCKBYTES;
|
||||
|
||||
while( inlen > 0 )
|
||||
{
|
||||
size_t left = S->buflen;
|
||||
if( inlen >= bsize - left )
|
||||
{
|
||||
memcpy_128( buf + (left>>2), input, (bsize - left) >> 2 );
|
||||
S->buflen += bsize - left;
|
||||
S->t[0] += BLAKE2S_BLOCKBYTES;
|
||||
S->t[1] += ( S->t[0] < BLAKE2S_BLOCKBYTES );
|
||||
blake2s_4way_compress( S, buf );
|
||||
S->buflen = 0;
|
||||
input += ( bsize >> 2 );
|
||||
inlen -= bsize;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy_128( buf + ( left>>2 ), input, inlen>>2 );
|
||||
S->buflen += (size_t) inlen;
|
||||
input += ( inlen>>2 );
|
||||
inlen -= inlen;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s_4way_final( blake2s_4way_state *S, void *out, uint8_t outlen )
|
||||
{
|
||||
__m128i *buf = (__m128i*)S->buf;
|
||||
|
||||
S->t[0] += S->buflen;
|
||||
S->t[1] += ( S->t[0] < S->buflen );
|
||||
if ( S->last_node )
|
||||
S->f[1] = ~0U;
|
||||
S->f[0] = ~0U;
|
||||
|
||||
memset_zero_128( buf + ( S->buflen>>2 ),
|
||||
( BLAKE2S_BLOCKBYTES - S->buflen ) >> 2 );
|
||||
blake2s_4way_compress( S, buf );
|
||||
|
||||
for ( int i = 0; i < 8; ++i )
|
||||
casti_m128i( out, i ) = S->h[ i ];
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
int blake2s_8way_compress( blake2s_8way_state *S, const __m256i *block )
|
||||
{
|
||||
__m256i m[16];
|
||||
__m256i v[16];
|
||||
|
||||
memcpy_256( m, block, 16 );
|
||||
memcpy_256( v, S->h, 8 );
|
||||
|
||||
v[ 8] = _mm256_set1_epi32( blake2s_IV[0] );
|
||||
v[ 9] = _mm256_set1_epi32( blake2s_IV[1] );
|
||||
v[10] = _mm256_set1_epi32( blake2s_IV[2] );
|
||||
v[11] = _mm256_set1_epi32( blake2s_IV[3] );
|
||||
v[12] = _mm256_xor_si256( _mm256_set1_epi32( S->t[0] ),
|
||||
_mm256_set1_epi32( blake2s_IV[4] ) );
|
||||
v[13] = _mm256_xor_si256( _mm256_set1_epi32( S->t[1] ),
|
||||
_mm256_set1_epi32( blake2s_IV[5] ) );
|
||||
v[14] = _mm256_xor_si256( _mm256_set1_epi32( S->f[0] ),
|
||||
_mm256_set1_epi32( blake2s_IV[6] ) );
|
||||
v[15] = _mm256_xor_si256( _mm256_set1_epi32( S->f[1] ),
|
||||
_mm256_set1_epi32( blake2s_IV[7] ) );
|
||||
|
||||
#define G8W(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), \
|
||||
m[ blake2s_sigma[r][2*i+0] ] ); \
|
||||
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 16 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 12 ); \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), \
|
||||
m[ blake2s_sigma[r][2*i+1] ] ); \
|
||||
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 8 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 7 ); \
|
||||
} while(0)
|
||||
|
||||
#define ROUND8W(r) \
|
||||
do { \
|
||||
G8W( r, 0, v[ 0], v[ 4], v[ 8], v[12] ); \
|
||||
G8W( r, 1, v[ 1], v[ 5], v[ 9], v[13] ); \
|
||||
G8W( r, 2, v[ 2], v[ 6], v[10], v[14] ); \
|
||||
G8W( r, 3, v[ 3], v[ 7], v[11], v[15] ); \
|
||||
G8W( r, 4, v[ 0], v[ 5], v[10], v[15] ); \
|
||||
G8W( r, 5, v[ 1], v[ 6], v[11], v[12] ); \
|
||||
G8W( r, 6, v[ 2], v[ 7], v[ 8], v[13] ); \
|
||||
G8W( r, 7, v[ 3], v[ 4], v[ 9], v[14] ); \
|
||||
} while(0)
|
||||
|
||||
ROUND8W( 0 );
|
||||
ROUND8W( 1 );
|
||||
ROUND8W( 2 );
|
||||
ROUND8W( 3 );
|
||||
ROUND8W( 4 );
|
||||
ROUND8W( 5 );
|
||||
ROUND8W( 6 );
|
||||
ROUND8W( 7 );
|
||||
ROUND8W( 8 );
|
||||
ROUND8W( 9 );
|
||||
|
||||
for( size_t i = 0; i < 8; ++i )
|
||||
S->h[i] = _mm256_xor_si256( _mm256_xor_si256( S->h[i], v[i] ), v[i + 8] );
|
||||
|
||||
#undef G8W
|
||||
#undef ROUND8W
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s_8way_init( blake2s_8way_state *S, const uint8_t outlen )
|
||||
{
|
||||
blake2s_nway_param P[1];
|
||||
|
||||
P->digest_length = outlen;
|
||||
P->key_length = 0;
|
||||
P->fanout = 1;
|
||||
P->depth = 1;
|
||||
P->leaf_length = 0;
|
||||
*((uint64_t*)(P->node_offset)) = 0;
|
||||
P->node_depth = 0;
|
||||
P->inner_length = 0;
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
|
||||
memset( S, 0, sizeof( blake2s_8way_state ) );
|
||||
for( int i = 0; i < 8; ++i )
|
||||
S->h[i] = _mm256_set1_epi32( blake2s_IV[i] );
|
||||
|
||||
uint32_t *p = ( uint32_t * )( P );
|
||||
|
||||
/* IV XOR ParamBlock */
|
||||
for ( size_t i = 0; i < 8; ++i )
|
||||
S->h[i] = _mm256_xor_si256( S->h[i], _mm256_set1_epi32( p[i] ) );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s_8way_update( blake2s_8way_state *S, const void *in,
|
||||
uint64_t inlen )
|
||||
{
|
||||
__m256i *input = (__m256i*)in;
|
||||
__m256i *buf = (__m256i*)S->buf;
|
||||
const int bsize = BLAKE2S_BLOCKBYTES;
|
||||
|
||||
while( inlen > 0 )
|
||||
{
|
||||
size_t left = S->buflen;
|
||||
if( inlen >= bsize - left )
|
||||
{
|
||||
memcpy_256( buf + (left>>2), input, (bsize - left) >> 2 );
|
||||
S->buflen += bsize - left;
|
||||
S->t[0] += BLAKE2S_BLOCKBYTES;
|
||||
S->t[1] += ( S->t[0] < BLAKE2S_BLOCKBYTES );
|
||||
blake2s_8way_compress( S, buf );
|
||||
S->buflen = 0;
|
||||
input += ( bsize >> 2 );
|
||||
inlen -= bsize;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy_256( buf + ( left>>2 ), input, inlen>>2 );
|
||||
S->buflen += (size_t) inlen;
|
||||
input += ( inlen>>2 );
|
||||
inlen -= inlen;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s_8way_final( blake2s_8way_state *S, void *out, uint8_t outlen )
|
||||
{
|
||||
__m256i *buf = (__m256i*)S->buf;
|
||||
|
||||
S->t[0] += S->buflen;
|
||||
S->t[1] += ( S->t[0] < S->buflen );
|
||||
if ( S->last_node )
|
||||
S->f[1] = ~0U;
|
||||
S->f[0] = ~0U;
|
||||
|
||||
memset_zero_256( buf + ( S->buflen>>2 ),
|
||||
( BLAKE2S_BLOCKBYTES - S->buflen ) >> 2 );
|
||||
blake2s_8way_compress( S, buf );
|
||||
|
||||
for ( int i = 0; i < 8; ++i )
|
||||
casti_m256i( out, i ) = S->h[ i ];
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#endif // __AVX2__
|
||||
|
||||
#if 0
|
||||
int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
|
||||
{
|
||||
blake2s_state S[1];
|
||||
|
||||
/* Verify parameters */
|
||||
if ( NULL == in ) return -1;
|
||||
|
||||
if ( NULL == out ) return -1;
|
||||
|
||||
if ( NULL == key ) keylen = 0; /* Fail here instead if keylen != 0 and key == NULL? */
|
||||
|
||||
if( keylen > 0 )
|
||||
{
|
||||
if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( blake2s_init( S, outlen ) < 0 ) return -1;
|
||||
}
|
||||
|
||||
blake2s_update( S, ( uint8_t * )in, inlen );
|
||||
blake2s_final( S, out, outlen );
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __AVX__
|
112
algo/blake/blake2s-hash-4way.h
Normal file
112
algo/blake/blake2s-hash-4way.h
Normal file
@@ -0,0 +1,112 @@
|
||||
/**
|
||||
* BLAKE2 reference source code package - reference C implementations
|
||||
*
|
||||
* Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
||||
*
|
||||
* To the extent possible under law, the author(s) have dedicated all copyright
|
||||
* and related and neighboring rights to this software to the public domain
|
||||
* worldwide. This software is distributed without any warranty.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
* this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
//#pragma once
|
||||
#ifndef __BLAKE2S_HASH_4WAY_H__
|
||||
#define __BLAKE2S_HASH_4WAY_H__ 1
|
||||
|
||||
#if defined(__AVX__)
|
||||
|
||||
#include "avxdefs.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <inttypes.h>
|
||||
#define inline __inline
|
||||
#define ALIGN(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGN(x) __attribute__((aligned(x)))
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum blake2s_constant
|
||||
{
|
||||
BLAKE2S_BLOCKBYTES = 64,
|
||||
BLAKE2S_OUTBYTES = 32,
|
||||
BLAKE2S_KEYBYTES = 32,
|
||||
BLAKE2S_SALTBYTES = 8,
|
||||
BLAKE2S_PERSONALBYTES = 8
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
typedef struct __blake2s_nway_param
|
||||
{
|
||||
uint8_t digest_length; // 1
|
||||
uint8_t key_length; // 2
|
||||
uint8_t fanout; // 3
|
||||
uint8_t depth; // 4
|
||||
uint32_t leaf_length; // 8
|
||||
uint8_t node_offset[6];// 14
|
||||
uint8_t node_depth; // 15
|
||||
uint8_t inner_length; // 16
|
||||
// uint8_t reserved[0];
|
||||
uint8_t salt[BLAKE2S_SALTBYTES]; // 24
|
||||
uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32
|
||||
} blake2s_nway_param;
|
||||
#pragma pack(pop)
|
||||
|
||||
ALIGN( 64 ) typedef struct __blake2s_4way_state
|
||||
{
|
||||
__m128i h[8];
|
||||
uint8_t buf[ BLAKE2S_BLOCKBYTES * 4 ];
|
||||
uint32_t t[2];
|
||||
uint32_t f[2];
|
||||
size_t buflen;
|
||||
uint8_t last_node;
|
||||
} blake2s_4way_state ;
|
||||
|
||||
int blake2s_4way_init( blake2s_4way_state *S, const uint8_t outlen );
|
||||
int blake2s_4way_update( blake2s_4way_state *S, const void *in,
|
||||
uint64_t inlen );
|
||||
int blake2s_4way_final( blake2s_4way_state *S, void *out, uint8_t outlen );
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
ALIGN( 64 ) typedef struct __blake2s_8way_state
|
||||
{
|
||||
__m256i h[8];
|
||||
uint8_t buf[ BLAKE2S_BLOCKBYTES * 8 ];
|
||||
uint32_t t[2];
|
||||
uint32_t f[2];
|
||||
size_t buflen;
|
||||
uint8_t last_node;
|
||||
} blake2s_8way_state ;
|
||||
|
||||
int blake2s_8way_init( blake2s_8way_state *S, const uint8_t outlen );
|
||||
int blake2s_8way_update( blake2s_8way_state *S, const void *in,
|
||||
uint64_t inlen );
|
||||
int blake2s_8way_final( blake2s_8way_state *S, void *out, uint8_t outlen );
|
||||
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
// Simple API
|
||||
// int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
|
||||
|
||||
// Direct Hash Mining Helpers
|
||||
#define blake2s_salt32(out, in, inlen, key32) blake2s(out, in, key32, 32, inlen, 32) /* neoscrypt */
|
||||
#define blake2s_simple(out, in, inlen) blake2s(out, in, NULL, 32, inlen, 0)
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __AVX__
|
||||
|
||||
#endif
|
@@ -1,26 +1,29 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "blake2s-gate.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "sph-blake2s.h"
|
||||
|
||||
static __thread blake2s_state s_midstate;
|
||||
static __thread blake2s_state s_ctx;
|
||||
static __thread blake2s_state blake2s_ctx;
|
||||
//static __thread blake2s_state s_ctx;
|
||||
#define MIDLEN 76
|
||||
|
||||
void blake2s_hash(void *output, const void *input)
|
||||
void blake2s_hash( void *output, const void *input )
|
||||
{
|
||||
unsigned char _ALIGN(64) hash[BLAKE2S_OUTBYTES];
|
||||
blake2s_state blake2_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
blake2s_init(&blake2_ctx, BLAKE2S_OUTBYTES);
|
||||
blake2s_update(&blake2_ctx, input, 80);
|
||||
blake2s_final(&blake2_ctx, hash, BLAKE2S_OUTBYTES);
|
||||
unsigned char _ALIGN(64) hash[BLAKE2S_OUTBYTES];
|
||||
blake2s_state ctx __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx, &blake2s_ctx, sizeof ctx );
|
||||
blake2s_update( &ctx, input+64, 16 );
|
||||
|
||||
// blake2s_init(&ctx, BLAKE2S_OUTBYTES);
|
||||
// blake2s_update(&ctx, input, 80);
|
||||
blake2s_final( &ctx, hash, BLAKE2S_OUTBYTES );
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
/*
|
||||
static void blake2s_hash_end(uint32_t *output, const uint32_t *input)
|
||||
{
|
||||
s_ctx.buflen = MIDLEN;
|
||||
@@ -28,7 +31,7 @@ static void blake2s_hash_end(uint32_t *output, const uint32_t *input)
|
||||
blake2s_update(&s_ctx, (uint8_t*) &input[MIDLEN/4], 80 - MIDLEN);
|
||||
blake2s_final(&s_ctx, (uint8_t*) output, BLAKE2S_OUTBYTES);
|
||||
}
|
||||
|
||||
*/
|
||||
int scanhash_blake2s(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
{
|
||||
@@ -46,13 +49,12 @@ int scanhash_blake2s(int thr_id, struct work *work,
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
// midstate
|
||||
blake2s_init(&s_midstate, BLAKE2S_OUTBYTES);
|
||||
blake2s_update(&s_midstate, (uint8_t*) endiandata, MIDLEN);
|
||||
memcpy(&s_ctx, &s_midstate, sizeof(blake2s_state));
|
||||
blake2s_init( &blake2s_ctx, BLAKE2S_OUTBYTES );
|
||||
blake2s_update( &blake2s_ctx, (uint8_t*) endiandata, 64 );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
blake2s_hash_end(hash64, endiandata);
|
||||
blake2s_hash( hash64, endiandata );
|
||||
if (hash64[7] < Htarg && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
@@ -67,7 +69,7 @@ int scanhash_blake2s(int thr_id, struct work *work,
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blake2s_get_max64 ()
|
||||
{
|
||||
@@ -81,4 +83,4 @@ bool register_blake2s_algo( algo_gate_t* gate )
|
||||
gate->get_max64 = (void*)&blake2s_get_max64;
|
||||
return true;
|
||||
};
|
||||
|
||||
*/
|
||||
|
@@ -1,21 +1,22 @@
|
||||
#include "blakecoin-gate.h"
|
||||
|
||||
#if defined (BLAKECOIN_4WAY)
|
||||
|
||||
#include "blake-hash-4way.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
|
||||
blake256r8_4way_context blakecoin_ctx;
|
||||
#if defined (BLAKECOIN_4WAY)
|
||||
|
||||
blake256r8_4way_context blakecoin_4w_ctx;
|
||||
|
||||
void blakecoin_4way_hash(void *state, const void *input)
|
||||
{
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake256r8_4way_context ctx;
|
||||
memcpy( &ctx, &blakecoin_ctx, sizeof ctx );
|
||||
|
||||
memcpy( &ctx, &blakecoin_4w_ctx, sizeof ctx );
|
||||
blake256r8_4way( &ctx, input + (64<<2), 16 );
|
||||
blake256r8_4way_close( &ctx, vhash );
|
||||
|
||||
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
@@ -31,58 +32,30 @@ int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
|
||||
if (opt_benchmark)
|
||||
if ( opt_benchmark )
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( edata, pdata, 20 );
|
||||
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
blake256r8_4way_init( &blakecoin_ctx );
|
||||
blake256r8_4way( &blakecoin_ctx, vdata, 64 );
|
||||
blake256r8_4way_init( &blakecoin_4w_ctx );
|
||||
blake256r8_4way( &blakecoin_4w_ctx, vdata, 64 );
|
||||
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
|
||||
blakecoin_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
blakecoin_4way_hash( hash, vdata );
|
||||
|
||||
if ( hash[7] <= HTarget && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= HTarget && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= HTarget && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= HTarget && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
@@ -90,15 +63,77 @@ int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
|
||||
// workaround to prevent flood of hash reports when nonce range exhasuted
|
||||
// and thread is spinning waiting for new work
|
||||
if ( ( n >= max_nonce ) && ( *hashes_done < 10 ) )
|
||||
{
|
||||
*hashes_done = 0;
|
||||
// sleep(1);
|
||||
}
|
||||
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(BLAKECOIN_8WAY)
|
||||
|
||||
blake256r8_8way_context blakecoin_8w_ctx;
|
||||
|
||||
void blakecoin_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
blake256r8_8way_context ctx;
|
||||
|
||||
memcpy( &ctx, &blakecoin_8w_ctx, sizeof ctx );
|
||||
blake256r8_8way( &ctx, input + (64<<3), 16 );
|
||||
blake256r8_8way_close( &ctx, vhash );
|
||||
|
||||
mm256_deinterleave_8x32( state, state+ 32, state+ 64, state+ 96,
|
||||
state+128, state+160, state+192, state+224,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
uint32_t *noncep = vdata + 152; // 19*8
|
||||
int num_found = 0;
|
||||
if ( opt_benchmark )
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
blake256r8_8way_init( &blakecoin_8w_ctx );
|
||||
blake256r8_8way( &blakecoin_8w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
be32enc( noncep +4, n+4 );
|
||||
be32enc( noncep +5, n+5 );
|
||||
be32enc( noncep +6, n+6 );
|
||||
be32enc( noncep +7, n+7 );
|
||||
pdata[19] = n;
|
||||
blakecoin_8way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
|
@@ -8,55 +8,21 @@ int64_t blakecoin_get_max64 ()
|
||||
// return 0x3fffffLL;
|
||||
}
|
||||
|
||||
// Blakecoin 4 way hashes so fast it runs out of nonces.
|
||||
// This is an attempt to solve this but the result may be
|
||||
// to rehash old nonces until new work is received.
|
||||
void bc4w_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
uint32_t *end_nonce_ptr, bool clean_job )
|
||||
{
|
||||
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
|
||||
|
||||
// if ( have_stratum && ( *nonceptr >= *end_nonce_ptr ) )
|
||||
// algo_gate.stratum_gen_work( &stratum, g_work );
|
||||
|
||||
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
|
||||
|| ( *nonceptr >= *end_nonce_ptr )
|
||||
|| ( ( work->job_id != g_work->job_id ) && clean_job ) )
|
||||
/*
|
||||
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
|
||||
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr )
|
||||
|| ( work->job_id != g_work->job_id ) ) )
|
||||
*/
|
||||
{
|
||||
work_free( work );
|
||||
work_copy( work, g_work );
|
||||
*nonceptr = 0xffffffffU / opt_n_threads * thr_id;
|
||||
if ( opt_randomize )
|
||||
*nonceptr += ( (rand() *4 ) & UINT32_MAX ) / opt_n_threads;
|
||||
*end_nonce_ptr = ( 0xffffffffU / opt_n_threads ) * (thr_id+1) - 0x20;
|
||||
// try incrementing the xnonce to chsnge the data
|
||||
// for ( int i = 0; i < work->xnonce2_size && !( ++work->xnonce2[i] ); i++ );
|
||||
}
|
||||
else
|
||||
++(*nonceptr);
|
||||
}
|
||||
|
||||
|
||||
// vanilla uses default gen merkle root, otherwise identical to blakecoin
|
||||
bool register_vanilla_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(BLAKECOIN_4WAY)
|
||||
// four_way_not_tested();
|
||||
#if defined(BLAKECOIN_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_blakecoin_8way;
|
||||
gate->hash = (void*)&blakecoin_8way_hash;
|
||||
|
||||
#elif defined(BLAKECOIN_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_blakecoin_4way;
|
||||
gate->hash = (void*)&blakecoin_4way_hash;
|
||||
// gate->get_new_work = (void*)&bc4w_get_new_work;
|
||||
// blakecoin_4way_init( &blake_4way_init_ctx );
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_blakecoin;
|
||||
gate->hash = (void*)&blakecoinhash;
|
||||
// blakecoin_init( &blake_init_ctx );
|
||||
#endif
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&blakecoin_get_max64;
|
||||
return true;
|
||||
}
|
||||
|
@@ -1,12 +1,21 @@
|
||||
#ifndef __BLAKECOIN_GATE_H__
|
||||
#define __BLAKECOIN_GATE_H__
|
||||
#define __BLAKECOIN_GATE_H__ 1
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#if defined(__AVX__)
|
||||
#define BLAKECOIN_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
#define BLAKECOIN_8WAY
|
||||
#endif
|
||||
|
||||
#if defined (BLAKECOIN_8WAY)
|
||||
void blakecoin_8way_hash(void *state, const void *input);
|
||||
int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
#if defined (BLAKECOIN_4WAY)
|
||||
void blakecoin_4way_hash(void *state, const void *input);
|
||||
|
@@ -38,7 +38,6 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
|
||||
// copy to buffer guaranteed to be aligned.
|
||||
@@ -52,7 +51,6 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
uint32_t *noncep = vdata + DECRED_NONCE_INDEX * 4;
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
* noncep = n;
|
||||
*(noncep+1) = n+1;
|
||||
*(noncep+2) = n+2;
|
||||
@@ -60,35 +58,12 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
decred_hash_4way( hash, vdata );
|
||||
|
||||
if ( hash[7] <= HTarget && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
work_set_target_ratio( work, hash );
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
pdata[DECRED_NONCE_INDEX] = n;
|
||||
}
|
||||
if ( (hash+8)[7] <= HTarget && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
}
|
||||
if ( (hash+16)[7] <= HTarget && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
}
|
||||
|
||||
if ( (hash+24)[7] <= HTarget && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
pdata[DECRED_NONCE_INDEX] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
|
@@ -111,12 +111,8 @@ int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
// uint32_t _ALIGN(32) hash64[8];
|
||||
// uint32_t _ALIGN(32) endiandata[32];
|
||||
@@ -150,47 +146,19 @@ int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
pentablakehash_4way( hash, vdata );
|
||||
|
||||
// return immediately on nonce found, only one submit
|
||||
if ( ( !(hash[7] & mask) ) && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( !( (hash+(i<<3))[7] & mask )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
if ( (! ((hash+8)[7] & mask) ) && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n;
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
if ( ( !((hash+16)[7] & mask) ) && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n;
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
if ( ( !((hash+24)[7] & mask) ) && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n;
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
|
@@ -64,12 +64,8 @@ int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 76; // 19*4
|
||||
uint32_t *noncep1 = vdata + 77;
|
||||
uint32_t *noncep2 = vdata + 78;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
|
||||
/*
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -86,42 +82,20 @@ int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
|
||||
myriad_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = pdata[19] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
|
@@ -150,6 +150,9 @@ int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce,
|
||||
int searchNumber = COMPARE_SIZE / opt_n_threads;
|
||||
int startLoc = threadNumber * searchNumber;
|
||||
|
||||
if ( opt_debug )
|
||||
applog( LOG_DEBUG,"Hash target= %08lx", ptarget[7] );
|
||||
|
||||
for(int32_t k = startLoc; k < startLoc + searchNumber && !work_restart[threadNumber].restart; k++)
|
||||
{
|
||||
// copy data to first l2 cache
|
||||
|
@@ -95,12 +95,8 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
@@ -131,46 +127,21 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
jha_hash_4way( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( !(hash[7] & mask) )
|
||||
&& fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( !( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( !((hash+8)[7] & mask) )
|
||||
&& fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( !((hash+16)[7] & mask) )
|
||||
&& fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( !((hash+24)[7] & mask) )
|
||||
&& fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -32,12 +32,8 @@ int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
// const uint32_t Htarg = ptarget[7];
|
||||
uint32_t endiandata[20];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
@@ -46,42 +42,20 @@ int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
keccakhash_4way( hash, vdata );
|
||||
|
||||
if ( ( ( hash[7] & 0xFFFFFF00 ) == 0 )
|
||||
&& fulltest( hash, ptarget) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & 0xFFFFFF00 ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
pdata[19] = n;
|
||||
}
|
||||
if ( ( ( (hash+8)[7] & 0xFFFFFF00 ) == 0 )
|
||||
&& fulltest( hash+8, ptarget) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
}
|
||||
if ( ( ( (hash+16) [7] & 0xFFFFFF00 ) == 0 )
|
||||
&& fulltest( hash+16, ptarget) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
}
|
||||
if ( ( ( (hash+24)[7] & 0xFFFFFF00 ) == 0 )
|
||||
&& fulltest( hash+24, ptarget) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
|
@@ -491,14 +491,14 @@ int luffa_2way_update( luffa_2way_context *state, const void *data,
|
||||
__m256i *buffer = (__m256i*)state->buffer;
|
||||
__m256i msg[2];
|
||||
int i;
|
||||
int blocks = (int)len / 32;
|
||||
state-> rembytes = (int)len % 32;
|
||||
int blocks = (int)len >> 5;
|
||||
state-> rembytes = (int)len & 0x1F;
|
||||
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++, vdata+=2 )
|
||||
{
|
||||
msg[0] = mm256_bswap_32( vdata[ i ] );
|
||||
msg[1] = mm256_bswap_32( vdata[ i+1 ] );
|
||||
msg[0] = mm256_bswap_32( vdata[ 0] );
|
||||
msg[1] = mm256_bswap_32( vdata[ 1 ] );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
||||
@@ -533,7 +533,7 @@ int luffa_2way_close( luffa_2way_context *state, void *hashval )
|
||||
finalization512_2way( state, (uint32*)hashval );
|
||||
|
||||
if ( state->hashbitlen > 512 )
|
||||
finalization512_2way( state, (uint32*)( hashval+128 ) );
|
||||
finalization512_2way( state, (uint32*)( hashval+32 ) );
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -575,7 +575,7 @@ int luffa_2way_update_close( luffa_2way_context *state,
|
||||
|
||||
finalization512_2way( state, (uint32*)output );
|
||||
if ( state->hashbitlen > 512 )
|
||||
finalization512_2way( state, (uint32*)( output+128 ) );
|
||||
finalization512_2way( state, (uint32*)( output+32 ) );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@@ -1,5 +1,6 @@
|
||||
#include "allium-gate.h"
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
|
||||
#if defined (ALLIUM_4WAY)
|
||||
|
||||
@@ -18,14 +19,15 @@ typedef struct {
|
||||
|
||||
} allium_4way_ctx_holder;
|
||||
|
||||
static allium_4way_ctx_holder allium_4way_ctx;
|
||||
static __thread allium_4way_ctx_holder allium_4way_ctx;
|
||||
|
||||
void init_allium_4way_ctx()
|
||||
bool init_allium_4way_ctx()
|
||||
{
|
||||
keccak256_4way_init( &allium_4way_ctx.keccak );
|
||||
cubehashInit( &allium_4way_ctx.cube, 256, 16, 32 );
|
||||
skein256_4way_init( &allium_4way_ctx.skein );
|
||||
init_groestl256( &allium_4way_ctx.groestl, 32 );
|
||||
return true;
|
||||
}
|
||||
|
||||
void allium_4way_hash( void *state, const void *input )
|
||||
@@ -99,12 +101,8 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 76; // 19*4
|
||||
uint32_t *noncep1 = vdata + 77;
|
||||
uint32_t *noncep2 = vdata + 78;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
@@ -115,44 +113,22 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
blake256_4way( &allium_4way_ctx.blake, vdata, 64 );
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
|
||||
allium_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
allium_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = pdata[19] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n += 4;
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
|
||||
|
@@ -5,11 +5,11 @@ int64_t get_max64_0xFFFFLL() { return 0xFFFFLL; }
|
||||
bool register_allium_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (ALLIUM_4WAY)
|
||||
init_allium_4way_ctx();
|
||||
gate->miner_thread_init = (void*)&init_allium_4way_ctx;
|
||||
gate->scanhash = (void*)&scanhash_allium_4way;
|
||||
gate->hash = (void*)&allium_4way_hash;
|
||||
#else
|
||||
init_allium_ctx();
|
||||
gate->miner_thread_init = (void*)&init_allium_ctx;
|
||||
gate->scanhash = (void*)&scanhash_allium;
|
||||
gate->hash = (void*)&allium_hash;
|
||||
#endif
|
||||
|
@@ -16,14 +16,14 @@ bool register_allium_algo( algo_gate_t* gate );
|
||||
void allium_4way_hash( void *state, const void *input );
|
||||
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
void init_allium_4way_ctx();
|
||||
bool init_allium_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void allium_hash( void *state, const void *input );
|
||||
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
void init_allium_ctx();
|
||||
bool init_allium_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -12,9 +12,9 @@
|
||||
#include "lyra2.h"
|
||||
|
||||
typedef struct {
|
||||
cubehashParam cube;
|
||||
sph_blake256_context blake;
|
||||
sph_keccak256_context keccak;
|
||||
cubehashParam cube;
|
||||
sph_skein256_context skein;
|
||||
#if defined (__AES__)
|
||||
hashState_groestl256 groestl;
|
||||
@@ -23,9 +23,9 @@ typedef struct {
|
||||
#endif
|
||||
} allium_ctx_holder;
|
||||
|
||||
static allium_ctx_holder allium_ctx;
|
||||
static __thread allium_ctx_holder allium_ctx;
|
||||
|
||||
void init_allium_ctx()
|
||||
bool init_allium_ctx()
|
||||
{
|
||||
sph_keccak256_init( &allium_ctx.keccak );
|
||||
cubehashInit( &allium_ctx.cube, 256, 16, 32 );
|
||||
@@ -35,6 +35,7 @@ void init_allium_ctx()
|
||||
#else
|
||||
sph_groestl256_init( &allium_ctx.groestl );
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
void allium_hash(void *state, const void *input)
|
||||
|
@@ -1,123 +0,0 @@
|
||||
#include "allium-gate.h"
|
||||
#include <memory.h>
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
#else
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
cubehashParam cube;
|
||||
sph_blake256_context blake;
|
||||
sph_keccak256_context keccak;
|
||||
sph_skein256_context skein;
|
||||
#if defined (__AES__)
|
||||
hashState_groestl256 groestl;
|
||||
#else
|
||||
sph_groestl256_context groestl;
|
||||
#endif
|
||||
} allium_ctx_holder;
|
||||
|
||||
static allium_ctx_holder allium_ctx;
|
||||
static __thread sph_blake256_context allium_blake_mid;
|
||||
|
||||
void init_allium_ctx()
|
||||
{
|
||||
cubehashInit( &allium_ctx.cube, 256, 16, 32 );
|
||||
sph_blake256_init( &allium_ctx.blake );
|
||||
sph_keccak256_init( &allium_ctx.keccak );
|
||||
sph_skein256_init( &allium_ctx.skein );
|
||||
#if defined (__AES__)
|
||||
init_groestl256( &allium_ctx.groestl, 32 );
|
||||
#else
|
||||
sph_groestl256_init( &allium_ctx.groestl );
|
||||
#endif
|
||||
}
|
||||
|
||||
void allium_blake256_midstate( const void* input )
|
||||
{
|
||||
memcpy( &allium_blake_mid, &allium_ctx.blake, sizeof allium_blake_mid );
|
||||
sph_blake256( &allium_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void allium_hash( void *state, const void *input )
|
||||
{
|
||||
allium_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &allium_ctx, sizeof(allium_ctx) );
|
||||
uint8_t hash[128] __attribute__ ((aligned (64)));
|
||||
const int midlen = 64; // bytes
|
||||
const int tail = 80 - midlen; // 16
|
||||
|
||||
memcpy( &ctx.blake, &allium_blake_mid, sizeof allium_blake_mid );
|
||||
sph_blake256( &ctx.blake, (uint8_t*)input + midlen, tail );
|
||||
sph_blake256_close( &ctx.blake, hash );
|
||||
|
||||
sph_keccak256( &ctx.keccak, hash, 32 );
|
||||
sph_keccak256_close(&ctx.keccak, hash);
|
||||
|
||||
LYRA2RE( hash, 32, hash, 32, hash, 32, 1, 8, 8 );
|
||||
// LYRA2REV2( allium_wholeMatrix, hash, 32, hash, 32, hash, 32, 1, 8, 8 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash, (const byte*)hash, 32 );
|
||||
|
||||
LYRA2RE( hash, 32, hash, 32, hash, 32, 1, 8, 8 );
|
||||
// LYRA2REV2( allium_wholeMatrix, hash, 32, hash, 32, hash, 32, 1, 8, 8 );
|
||||
|
||||
sph_skein256( &ctx.skein, hash, 32 );
|
||||
sph_skein256_close( &ctx.skein, hash );
|
||||
|
||||
#if defined (__AES__)
|
||||
update_and_final_groestl256( &ctx.groestl, hash, hash, 256 );
|
||||
#else
|
||||
sph_groestl256( &ctx.skein, hash, 32 );
|
||||
sph_groestl256_close( &ctx.skein, hash );
|
||||
#endif
|
||||
|
||||
memcpy( state, hash, 32 );
|
||||
}
|
||||
|
||||
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
allium_blake256_midstate( endiandata );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
allium_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg )
|
||||
{
|
||||
if( fulltest(hash, ptarget) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -68,13 +68,13 @@ int LYRA2REV2( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
//Tries to allocate enough space for the whole memory matrix
|
||||
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
|
||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
// const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
// for Lyra2REv2, nCols = 4, v1 was using 8
|
||||
const int64_t BLOCK_LEN = (nCols == 4) ? BLOCK_LEN_BLAKE2_SAFE_INT64
|
||||
: BLOCK_LEN_BLAKE2_SAFE_BYTES;
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
|
||||
memset( wholeMatrix, 0, ROW_LEN_BYTES * nRows );
|
||||
// memset( wholeMatrix, 0, ROW_LEN_BYTES * nRows );
|
||||
|
||||
//=== Getting the password + salt + basil padded with 10*1 ==========//
|
||||
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
|
||||
@@ -232,9 +232,9 @@ int LYRA2Z( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
//Tries to allocate enough space for the whole memory matrix
|
||||
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
|
||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
// const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
memset( wholeMatrix, 0, ROW_LEN_BYTES * nRows );
|
||||
// memset( wholeMatrix, 0, ROW_LEN_BYTES * nRows );
|
||||
|
||||
//==== Getting the password + salt + basil padded with 10*1 ============//
|
||||
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
|
||||
@@ -380,18 +380,17 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd, const uint64_t pwdlen,
|
||||
: BLOCK_LEN_BLAKE2_SAFE_BYTES;
|
||||
|
||||
i = (int64_t)ROW_LEN_BYTES * nRows;
|
||||
uint64_t *wholeMatrix = _mm_malloc( i, 32 );
|
||||
// uint64_t *wholeMatrix = _mm_malloc( i, 64 );
|
||||
uint64_t *wholeMatrix = _mm_malloc( i, 64 );
|
||||
if (wholeMatrix == NULL)
|
||||
return -1;
|
||||
|
||||
//#if defined (__AVX2__)
|
||||
// memset_zero_m256i( (__m256i*)wholeMatrix, i<<5 );
|
||||
//#elif defined(__AVX__)
|
||||
// memset_zero_m128i( (__m128i*)wholeMatrix, i<<4 );
|
||||
//#else
|
||||
memset(wholeMatrix, 0, i);
|
||||
//#endif
|
||||
#if defined(__AVX2__)
|
||||
memset_zero_256( (__m256i*)wholeMatrix, i>>5 );
|
||||
#elif defined(__AVX__)
|
||||
memset_zero_128( (__m128i*)wholeMatrix, i>>4 );
|
||||
#else
|
||||
memset( wholeMatrix, 0, i );
|
||||
#endif
|
||||
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
|
||||
@@ -413,8 +412,8 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd, const uint64_t pwdlen,
|
||||
memcpy(ptrByte, salt, saltlen);
|
||||
ptrByte += saltlen;
|
||||
|
||||
memset( ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES
|
||||
- (saltlen + pwdlen) );
|
||||
// memset( ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES
|
||||
// - (saltlen + pwdlen) );
|
||||
|
||||
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
|
||||
memcpy(ptrByte, &kLen, sizeof(int64_t));
|
||||
|
@@ -61,12 +61,8 @@ int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 76; // 19*4
|
||||
uint32_t *noncep1 = vdata + 77;
|
||||
uint32_t *noncep2 = vdata + 78;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep= vdata + 76; // 19*4
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
@@ -79,42 +75,20 @@ int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
lyra2h_4way_midstate( vdata );
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
|
||||
be32enc( &edata[19], n );
|
||||
lyra2h_4way_hash( hash, vdata );
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = pdata[19] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
|
@@ -19,12 +19,13 @@ typedef struct {
|
||||
|
||||
static lyra2v2_4way_ctx_holder l2v2_4way_ctx;
|
||||
|
||||
void init_lyra2rev2_4way_ctx()
|
||||
bool init_lyra2rev2_4way_ctx()
|
||||
{
|
||||
keccak256_4way_init( &l2v2_4way_ctx.keccak );
|
||||
cubehashInit( &l2v2_4way_ctx.cube, 256, 16, 32 );
|
||||
skein256_4way_init( &l2v2_4way_ctx.skein );
|
||||
bmw256_4way_init( &l2v2_4way_ctx.bmw );
|
||||
return true;
|
||||
}
|
||||
|
||||
void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
@@ -92,12 +93,8 @@ int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 76; // 19*4
|
||||
uint32_t *noncep1 = vdata + 77;
|
||||
uint32_t *noncep2 = vdata + 78;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
@@ -110,42 +107,20 @@ int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
blake256_4way( &l2v2_4way_ctx.blake, vdata, 64 );
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
|
||||
lyra2rev2_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = pdata[19] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
|
@@ -14,18 +14,20 @@ bool lyra2rev2_thread_init()
|
||||
|
||||
int i = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
||||
l2v2_wholeMatrix = _mm_malloc( i, 64 );
|
||||
|
||||
#if defined (LYRA2REV2_4WAY)
|
||||
init_lyra2rev2_4way_ctx();;
|
||||
#else
|
||||
init_lyra2rev2_ctx();
|
||||
#endif
|
||||
return l2v2_wholeMatrix;
|
||||
}
|
||||
|
||||
bool register_lyra2rev2_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (LYRA2REV2_4WAY)
|
||||
init_lyra2rev2_4way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2_4way;
|
||||
gate->hash = (void*)&lyra2rev2_4way_hash;
|
||||
#else
|
||||
init_lyra2rev2_ctx();
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2;
|
||||
gate->hash = (void*)&lyra2rev2_hash;
|
||||
#endif
|
||||
|
@@ -20,7 +20,7 @@ void lyra2rev2_4way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_lyra2rev2_4way_ctx();
|
||||
bool init_lyra2rev2_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
@@ -29,7 +29,7 @@ void lyra2rev2_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_lyra2rev2_ctx();
|
||||
bool init_lyra2rev2_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -21,7 +21,7 @@ typedef struct {
|
||||
static lyra2v2_ctx_holder lyra2v2_ctx;
|
||||
static __thread sph_blake256_context l2v2_blake_mid;
|
||||
|
||||
void init_lyra2rev2_ctx()
|
||||
bool init_lyra2rev2_ctx()
|
||||
{
|
||||
cubehashInit( &lyra2v2_ctx.cube1, 256, 16, 32 );
|
||||
cubehashInit( &lyra2v2_ctx.cube2, 256, 16, 32 );
|
||||
@@ -29,6 +29,7 @@ void init_lyra2rev2_ctx()
|
||||
sph_keccak256_init( &lyra2v2_ctx.keccak );
|
||||
sph_skein256_init( &lyra2v2_ctx.skein );
|
||||
sph_bmw256_init( &lyra2v2_ctx.bmw );
|
||||
return true;
|
||||
}
|
||||
|
||||
void l2v2_blake256_midstate( const void* input )
|
||||
|
@@ -61,12 +61,8 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 76; // 19*4
|
||||
uint32_t *noncep1 = vdata + 77;
|
||||
uint32_t *noncep2 = vdata + 78;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
@@ -79,42 +75,20 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
lyra2z_4way_midstate( vdata );
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
|
||||
lyra2z_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = pdata[19] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
@@ -126,3 +100,115 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(LYRA2Z_8WAY)
|
||||
|
||||
__thread uint64_t* lyra2z_8way_matrix;
|
||||
|
||||
bool lyra2z_8way_thread_init()
|
||||
{
|
||||
return ( lyra2z_8way_matrix = _mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_8way_context l2z_8way_blake_mid;
|
||||
|
||||
void lyra2z_8way_midstate( const void* input )
|
||||
{
|
||||
blake256_8way_init( &l2z_8way_blake_mid );
|
||||
blake256_8way( &l2z_8way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2z_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
blake256_8way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &l2z_8way_blake_mid, sizeof l2z_8way_blake_mid );
|
||||
blake256_8way( &ctx_blake, input + (64*8), 16 );
|
||||
blake256_8way_close( &ctx_blake, vhash );
|
||||
|
||||
mm256_deinterleave_8x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
LYRA2Z( lyra2z_8way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash2, 32, hash2, 32, hash2, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash3, 32, hash3, 32, hash3, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash4, 32, hash4, 32, hash4, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash5, 32, hash5, 32, hash5, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash6, 32, hash6, 32, hash6, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash7, 32, hash7, 32, hash7, 32, 8, 8, 8 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+ 32, hash1, 32 );
|
||||
memcpy( state+ 64, hash2, 32 );
|
||||
memcpy( state+ 96, hash3, 32 );
|
||||
memcpy( state+128, hash1, 32 );
|
||||
memcpy( state+160, hash2, 32 );
|
||||
memcpy( state+192, hash3, 32 );
|
||||
memcpy( state+224, hash1, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 152; // 19*8
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &edata[i], pdata[i] );
|
||||
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
|
||||
lyra2z_8way_midstate( vdata );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
be32enc( noncep+4, n+4 );
|
||||
be32enc( noncep+5, n+5 );
|
||||
be32enc( noncep+6, n+6 );
|
||||
be32enc( noncep+7, n+7 );
|
||||
|
||||
lyra2z_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
@@ -8,7 +8,11 @@ void lyra2z_set_target( struct work* work, double job_diff )
|
||||
|
||||
bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
{
|
||||
#ifdef LYRA2Z_4WAY
|
||||
#if defined(LYRA2Z_8WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_8way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_8way;
|
||||
gate->hash = (void*)&lyra2z_8way_hash;
|
||||
#elif defined(LYRA2Z_4WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_4way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_4way;
|
||||
gate->hash = (void*)&lyra2z_4way_hash;
|
||||
|
@@ -1,17 +1,29 @@
|
||||
#ifndef LYRA2Z_GATE_H__
|
||||
#define LYRA2Z_GATE_H__
|
||||
#define LYRA2Z_GATE_H__ 1
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#if defined(__AVX__)
|
||||
#define LYRA2Z_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
// #define LYRA2Z_8WAY
|
||||
#endif
|
||||
|
||||
|
||||
#define LYRA2Z_MATRIX_SIZE BLOCK_LEN_INT64 * 8 * 8 * 8
|
||||
|
||||
#if defined(LYRA2Z_4WAY)
|
||||
#if defined(LYRA2Z_8WAY)
|
||||
|
||||
void lyra2z_8way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
bool lyra2z_8way_thread_init();
|
||||
|
||||
#elif defined(LYRA2Z_4WAY)
|
||||
|
||||
void lyra2z_4way_hash( void *state, const void *input );
|
||||
|
||||
@@ -20,7 +32,7 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
bool lyra2z_4way_thread_init();
|
||||
|
||||
#endif
|
||||
#else
|
||||
|
||||
void lyra2z_hash( void *state, const void *input );
|
||||
|
||||
@@ -31,3 +43,4 @@ bool lyra2z_thread_init();
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@@ -79,12 +79,8 @@ int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
@@ -117,47 +113,22 @@ int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t mask = masks[m];
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
nist5hash_4way( hash, vdata );
|
||||
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( !(hash[7] & mask) )
|
||||
&& fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( !( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( !((hash+8)[7] & mask) )
|
||||
&& fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( !((hash+16)[7] & mask) )
|
||||
&& fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( !((hash+24)[7] & mask) )
|
||||
&& fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -145,12 +145,8 @@ int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
@@ -181,42 +177,21 @@ int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
anime_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
anime_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -145,12 +145,8 @@ int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
@@ -159,42 +155,21 @@ int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
quark_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & 0xFFFFFF00 ) == 0 && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & 0xFFFFFF00 ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] & 0xFFFFFF00 ) == 0 && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+16)[7] & 0xFFFFFF00 ) == 0 && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+24)[7] & 0xFFFFFF00 ) == 0 && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -74,10 +74,8 @@ int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 32+3; // 4*8 + 3
|
||||
uint32_t *noncep1 = vdata + 32+7;
|
||||
uint32_t *noncep = vdata + 32+3; // 4*8 + 3
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
@@ -98,24 +96,20 @@ int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+4, n+1 );
|
||||
|
||||
deep_2way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( !( hash[7] & mask ) && fulltest( hash, ptarget) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
nonces[ num_found++ ] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( !( (hash+8)[7] & mask ) && fulltest( hash+8, ptarget) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
nonces[ num_found++ ] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
n += 2;
|
||||
|
@@ -25,7 +25,6 @@ qubit_2way_ctx_holder qubit_2way_ctx;
|
||||
|
||||
void init_qubit_2way_ctx()
|
||||
{
|
||||
luffa_2way_init( &qubit_2way_ctx.luffa, 512 );
|
||||
cubehashInit(&qubit_2way_ctx.cube,512,16,32);
|
||||
sph_shavite512_init(&qubit_2way_ctx.shavite);
|
||||
simd_2way_init( &qubit_2way_ctx.simd, 512 );
|
||||
@@ -81,10 +80,8 @@ int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 32+3; // 4*8 + 3
|
||||
uint32_t *noncep1 = vdata + 32+7;
|
||||
uint32_t *noncep = vdata + 32+3; // 4*8 + 3
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
@@ -94,7 +91,6 @@ int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||
// big endian encode 0..18 uint32_t, 64 bits at a time
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_2x128( (uint64_t*)vdata, edata, edata, 640 );
|
||||
|
||||
@@ -106,24 +102,21 @@ int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+4, n+1 );
|
||||
qubit_2way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
|
||||
if ( !( hash[7] & mask ) && fulltest( hash, ptarget) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
nonces[ num_found++ ] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( !( (hash+8)[7] & mask ) && fulltest( hash+8, ptarget) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
pdata[19] = n+1;
|
||||
nonces[ num_found++ ] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
n += 2;
|
||||
|
@@ -1,7 +1,4 @@
|
||||
#include "lbry-gate.h"
|
||||
|
||||
#if defined(LBRY_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@@ -9,6 +6,141 @@
|
||||
#include "algo/sha/sha2-hash-4way.h"
|
||||
#include "ripemd-hash-4way.h"
|
||||
|
||||
#define LBRY_INPUT_SIZE 112
|
||||
#define LBRY_MIDSTATE 64
|
||||
#define LBRY_TAIL (LBRY_INPUT_SIZE) - (LBRY_MIDSTATE)
|
||||
|
||||
#if defined(LBRY_8WAY)
|
||||
|
||||
static __thread sha256_8way_context sha256_8w_mid;
|
||||
|
||||
void lbry_8way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t _ALIGN(64) vhashA[16<<3];
|
||||
uint32_t _ALIGN(64) vhashB[16<<3];
|
||||
uint32_t _ALIGN(64) vhashC[16<<3];
|
||||
uint32_t _ALIGN(32) h0[32];
|
||||
uint32_t _ALIGN(32) h1[32];
|
||||
uint32_t _ALIGN(32) h2[32];
|
||||
uint32_t _ALIGN(32) h3[32];
|
||||
uint32_t _ALIGN(32) h4[32];
|
||||
uint32_t _ALIGN(32) h5[32];
|
||||
uint32_t _ALIGN(32) h6[32];
|
||||
uint32_t _ALIGN(32) h7[32];
|
||||
sha256_8way_context ctx_sha256 __attribute__ ((aligned (64)));
|
||||
sha512_4way_context ctx_sha512;
|
||||
ripemd160_8way_context ctx_ripemd;
|
||||
|
||||
memcpy( &ctx_sha256, &sha256_8w_mid, sizeof(ctx_sha256) );
|
||||
sha256_8way( &ctx_sha256, input + (LBRY_MIDSTATE<<3), LBRY_TAIL );
|
||||
sha256_8way_close( &ctx_sha256, vhashA );
|
||||
|
||||
sha256_8way_init( &ctx_sha256 );
|
||||
sha256_8way( &ctx_sha256, vhashA, 32 );
|
||||
sha256_8way_close( &ctx_sha256, vhashA );
|
||||
|
||||
// reinterleave to do sha512 4-way 64 bit twice.
|
||||
mm256_deinterleave_8x32( h0, h1, h2, h3, h4, h5, h6, h7, vhashA, 256 );
|
||||
mm256_interleave_4x64( vhashA, h0, h1, h2, h3, 256 );
|
||||
mm256_interleave_4x64( vhashB, h4, h5, h6, h7, 256 );
|
||||
|
||||
sha512_4way_init( &ctx_sha512 );
|
||||
sha512_4way( &ctx_sha512, vhashA, 32 );
|
||||
sha512_4way_close( &ctx_sha512, vhashA );
|
||||
|
||||
sha512_4way_init( &ctx_sha512 );
|
||||
sha512_4way( &ctx_sha512, vhashB, 32 );
|
||||
sha512_4way_close( &ctx_sha512, vhashB );
|
||||
|
||||
// back to 8-way 32 bit
|
||||
mm256_deinterleave_4x64( h0, h1, h2, h3, vhashA, 512 );
|
||||
mm256_deinterleave_4x64( h4, h5, h6, h7, vhashB, 512 );
|
||||
mm256_interleave_8x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, 512 );
|
||||
|
||||
ripemd160_8way_init( &ctx_ripemd );
|
||||
ripemd160_8way( &ctx_ripemd, vhashA, 32 );
|
||||
ripemd160_8way_close( &ctx_ripemd, vhashB );
|
||||
|
||||
ripemd160_8way_init( &ctx_ripemd );
|
||||
ripemd160_8way( &ctx_ripemd, vhashA+(8<<3), 32 );
|
||||
ripemd160_8way_close( &ctx_ripemd, vhashC );
|
||||
|
||||
sha256_8way_init( &ctx_sha256 );
|
||||
sha256_8way( &ctx_sha256, vhashB, 20 );
|
||||
sha256_8way( &ctx_sha256, vhashC, 20 );
|
||||
sha256_8way_close( &ctx_sha256, vhashA );
|
||||
|
||||
sha256_8way_init( &ctx_sha256 );
|
||||
sha256_8way( &ctx_sha256, vhashA, 32 );
|
||||
sha256_8way_close( &ctx_sha256, vhashA );
|
||||
|
||||
mm256_deinterleave_8x32( output, output+ 32, output+ 64, output+ 96,
|
||||
output+128, output+160, output+192, output+224,
|
||||
vhashA, 256 );
|
||||
}
|
||||
|
||||
int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[32*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[27];
|
||||
const uint32_t first_nonce = pdata[27];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t edata[32] __attribute__ ((aligned (64)));
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 216; // 27*8
|
||||
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||
0xFFFFF000, 0xFFFF0000, 0 };
|
||||
|
||||
// we need bigendian data...
|
||||
swab32_array( edata, pdata, 32 );
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 1024 );
|
||||
sha256_8way_init( &sha256_8w_mid );
|
||||
sha256_8way( &sha256_8w_mid, vdata, LBRY_MIDSTATE );
|
||||
|
||||
for ( int m = 0; m < sizeof(masks); m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
be32enc( noncep+4, n+4 );
|
||||
be32enc( noncep+5, n+5 );
|
||||
be32enc( noncep+6, n+6 );
|
||||
be32enc( noncep+7, n+7 );
|
||||
|
||||
lbry_8way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( !( (hash+(i<<3))[7] & mask ) && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[27] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n+=8;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#elif defined(LBRY_4WAY)
|
||||
|
||||
static __thread sha256_4way_context sha256_mid;
|
||||
|
||||
void lbry_4way_hash( void* output, const void* input )
|
||||
@@ -21,7 +153,7 @@ void lbry_4way_hash( void* output, const void* input )
|
||||
uint32_t _ALIGN(64) vhashC[16<<2];
|
||||
|
||||
memcpy( &ctx_sha256, &sha256_mid, sizeof(ctx_sha256) );
|
||||
sha256_4way( &ctx_sha256, input+(64<<2), 48 );
|
||||
sha256_4way( &ctx_sha256, input + (LBRY_MIDSTATE<<2), LBRY_TAIL );
|
||||
sha256_4way_close( &ctx_sha256, vhashA );
|
||||
|
||||
sha256_4way_init( &ctx_sha256 );
|
||||
@@ -67,12 +199,8 @@ int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t edata[32] __attribute__ ((aligned (64)));
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 108; // 27*4
|
||||
uint32_t *noncep1 = vdata + 109;
|
||||
uint32_t *noncep2 = vdata + 110;
|
||||
uint32_t *noncep3 = vdata + 111;
|
||||
uint32_t *noncep = vdata + 108; // 27*4
|
||||
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
@@ -83,47 +211,26 @@ int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
swab32_array( edata, pdata, 32 );
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 1024 );
|
||||
sha256_4way_init( &sha256_mid );
|
||||
sha256_4way( &sha256_mid, vdata, 64 );
|
||||
sha256_4way( &sha256_mid, vdata, LBRY_MIDSTATE );
|
||||
|
||||
for ( int m = 0; m < sizeof(masks); m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
|
||||
lbry_4way_hash( hash, vdata );
|
||||
|
||||
if ( !( hash[7] & mask ) && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( !( (hash+(i<<3))[7] & mask ) && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = pdata[27] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( !( (hash+8)[7] & mask ) && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( !( (hash+16)[7] & mask ) && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( !( (hash+24)[7] & mask ) && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[27] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n+=4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -73,7 +73,10 @@ int64_t lbry_get_max64() { return 0x1ffffLL; }
|
||||
bool register_lbry_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
|
||||
#if defined (LBRY_4WAY)
|
||||
#if defined (LBRY_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_lbry_8way;
|
||||
gate->hash = (void*)&lbry_8way_hash;
|
||||
#elif defined (LBRY_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_lbry_4way;
|
||||
gate->hash = (void*)&lbry_4way_hash;
|
||||
#else
|
||||
|
@@ -4,8 +4,9 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
// need sha512 2 way AVX x2 or 1 way scalar x4 to support 4way AVX.
|
||||
#if defined(__AVX2__)
|
||||
#define LBRY_4WAY
|
||||
#define LBRY_8WAY
|
||||
#endif
|
||||
|
||||
#define LBRY_NTIME_INDEX 25
|
||||
@@ -16,15 +17,21 @@
|
||||
|
||||
bool register_lbry_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(LBRY_4WAY)
|
||||
#if defined(LBRY_8WAY)
|
||||
|
||||
void lbry_8way_hash( void *state, const void *input );
|
||||
int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#elif defined(LBRY_4WAY)
|
||||
|
||||
void lbry_4way_hash( void *state, const void *input );
|
||||
int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
#else
|
||||
|
||||
void lbry_hash( void *state, const void *input );
|
||||
int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@@ -5,25 +5,6 @@
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Round functions for RIPEMD-128 and RIPEMD-160.
|
||||
*/
|
||||
#define F1(x, y, z) \
|
||||
_mm_xor_si128( _mm_xor_si128( x, y ), z )
|
||||
|
||||
#define F2(x, y, z) \
|
||||
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( y, z ), x ), z )
|
||||
|
||||
#define F3(x, y, z) \
|
||||
_mm_xor_si128( _mm_or_si128( x, mm_not( y ) ), z )
|
||||
|
||||
#define F4(x, y, z) \
|
||||
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( x, y ), z ), y )
|
||||
|
||||
#define F5(x, y, z) \
|
||||
_mm_xor_si128( x, _mm_or_si128( y, mm_not( z ) ) )
|
||||
|
||||
|
||||
static const uint32_t IV[5] =
|
||||
{ 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 };
|
||||
|
||||
@@ -42,6 +23,23 @@ static const uint32_t IV[5] =
|
||||
#define K24 0x7A6D76E9
|
||||
#define K25 0x00000000
|
||||
|
||||
// RIPEMD-160 4 way
|
||||
|
||||
#define F1(x, y, z) \
|
||||
_mm_xor_si128( _mm_xor_si128( x, y ), z )
|
||||
|
||||
#define F2(x, y, z) \
|
||||
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( y, z ), x ), z )
|
||||
|
||||
#define F3(x, y, z) \
|
||||
_mm_xor_si128( _mm_or_si128( x, mm_not( y ) ), z )
|
||||
|
||||
#define F4(x, y, z) \
|
||||
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( x, y ), z ), y )
|
||||
|
||||
#define F5(x, y, z) \
|
||||
_mm_xor_si128( x, _mm_or_si128( y, mm_not( z ) ) )
|
||||
|
||||
#define RR(a, b, c, d, e, f, s, r, k) \
|
||||
do{ \
|
||||
a = _mm_add_epi32( mm_rotl_32( _mm_add_epi32( _mm_add_epi32( \
|
||||
@@ -321,3 +319,304 @@ void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst )
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// Ripemd-160 8 way
|
||||
|
||||
#define F8W_1(x, y, z) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( x, y ), z )
|
||||
|
||||
#define F8W_2(x, y, z) \
|
||||
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( y, z ), x ), z )
|
||||
|
||||
#define F8W_3(x, y, z) \
|
||||
_mm256_xor_si256( _mm256_or_si256( x, mm256_not( y ) ), z )
|
||||
|
||||
#define F8W_4(x, y, z) \
|
||||
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( x, y ), z ), y )
|
||||
|
||||
#define F8W_5(x, y, z) \
|
||||
_mm256_xor_si256( x, _mm256_or_si256( y, mm256_not( z ) ) )
|
||||
|
||||
#define RR_8W(a, b, c, d, e, f, s, r, k) \
|
||||
do{ \
|
||||
a = _mm256_add_epi32( mm256_rotl_32( _mm256_add_epi32( _mm256_add_epi32( \
|
||||
_mm256_add_epi32( a, f( b ,c, d ) ), r ), \
|
||||
_mm256_set1_epi32( k ) ), s ), e ); \
|
||||
c = mm256_rotl_32( c, 10 );\
|
||||
} while (0)
|
||||
|
||||
#define ROUND1_8W(a, b, c, d, e, f, s, r, k) \
|
||||
RR_8W(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k)
|
||||
|
||||
#define ROUND2_8W(a, b, c, d, e, f, s, r, k) \
|
||||
RR_8W(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
|
||||
|
||||
static void ripemd160_8way_round( ripemd160_8way_context *sc )
|
||||
{
|
||||
const __m256i *in = (__m256i*)sc->buf;
|
||||
__m256i *h = (__m256i*)sc->val;
|
||||
register __m256i A1, B1, C1, D1, E1;
|
||||
register __m256i A2, B2, C2, D2, E2;
|
||||
__m256i tmp;
|
||||
|
||||
A1 = A2 = h[0];
|
||||
B1 = B2 = h[1];
|
||||
C1 = C2 = h[2];
|
||||
D1 = D2 = h[3];
|
||||
E1 = E2 = h[4];
|
||||
|
||||
ROUND1_8W( A, B, C, D, E, F8W_1, 11, in[ 0], 1 );
|
||||
ROUND1_8W( E, A, B, C, D, F8W_1, 14, in[ 1], 1 );
|
||||
ROUND1_8W( D, E, A, B, C, F8W_1, 15, in[ 2], 1 );
|
||||
ROUND1_8W( C, D, E, A, B, F8W_1, 12, in[ 3], 1 );
|
||||
ROUND1_8W( B, C, D, E, A, F8W_1, 5, in[ 4], 1 );
|
||||
ROUND1_8W( A, B, C, D, E, F8W_1, 8, in[ 5], 1 );
|
||||
ROUND1_8W( E, A, B, C, D, F8W_1, 7, in[ 6], 1 );
|
||||
ROUND1_8W( D, E, A, B, C, F8W_1, 9, in[ 7], 1 );
|
||||
ROUND1_8W( C, D, E, A, B, F8W_1, 11, in[ 8], 1 );
|
||||
ROUND1_8W( B, C, D, E, A, F8W_1, 13, in[ 9], 1 );
|
||||
ROUND1_8W( A, B, C, D, E, F8W_1, 14, in[10], 1 );
|
||||
ROUND1_8W( E, A, B, C, D, F8W_1, 15, in[11], 1 );
|
||||
ROUND1_8W( D, E, A, B, C, F8W_1, 6, in[12], 1 );
|
||||
ROUND1_8W( C, D, E, A, B, F8W_1, 7, in[13], 1 );
|
||||
ROUND1_8W( B, C, D, E, A, F8W_1, 9, in[14], 1 );
|
||||
ROUND1_8W( A, B, C, D, E, F8W_1, 8, in[15], 1 );
|
||||
|
||||
ROUND1_8W( E, A, B, C, D, F8W_2, 7, in[ 7], 2 );
|
||||
ROUND1_8W( D, E, A, B, C, F8W_2, 6, in[ 4], 2 );
|
||||
ROUND1_8W( C, D, E, A, B, F8W_2, 8, in[13], 2 );
|
||||
ROUND1_8W( B, C, D, E, A, F8W_2, 13, in[ 1], 2 );
|
||||
ROUND1_8W( A, B, C, D, E, F8W_2, 11, in[10], 2 );
|
||||
ROUND1_8W( E, A, B, C, D, F8W_2, 9, in[ 6], 2 );
|
||||
ROUND1_8W( D, E, A, B, C, F8W_2, 7, in[15], 2 );
|
||||
ROUND1_8W( C, D, E, A, B, F8W_2, 15, in[ 3], 2 );
|
||||
ROUND1_8W( B, C, D, E, A, F8W_2, 7, in[12], 2 );
|
||||
ROUND1_8W( A, B, C, D, E, F8W_2, 12, in[ 0], 2 );
|
||||
ROUND1_8W( E, A, B, C, D, F8W_2, 15, in[ 9], 2 );
|
||||
ROUND1_8W( D, E, A, B, C, F8W_2, 9, in[ 5], 2 );
|
||||
ROUND1_8W( C, D, E, A, B, F8W_2, 11, in[ 2], 2 );
|
||||
ROUND1_8W( B, C, D, E, A, F8W_2, 7, in[14], 2 );
|
||||
ROUND1_8W( A, B, C, D, E, F8W_2, 13, in[11], 2 );
|
||||
ROUND1_8W( E, A, B, C, D, F8W_2, 12, in[ 8], 2 );
|
||||
|
||||
ROUND1_8W( D, E, A, B, C, F8W_3, 11, in[ 3], 3 );
|
||||
ROUND1_8W( C, D, E, A, B, F8W_3, 13, in[10], 3 );
|
||||
ROUND1_8W( B, C, D, E, A, F8W_3, 6, in[14], 3 );
|
||||
ROUND1_8W( A, B, C, D, E, F8W_3, 7, in[ 4], 3 );
|
||||
ROUND1_8W( E, A, B, C, D, F8W_3, 14, in[ 9], 3 );
|
||||
ROUND1_8W( D, E, A, B, C, F8W_3, 9, in[15], 3 );
|
||||
ROUND1_8W( C, D, E, A, B, F8W_3, 13, in[ 8], 3 );
|
||||
ROUND1_8W( B, C, D, E, A, F8W_3, 15, in[ 1], 3 );
|
||||
ROUND1_8W( A, B, C, D, E, F8W_3, 14, in[ 2], 3 );
|
||||
ROUND1_8W( E, A, B, C, D, F8W_3, 8, in[ 7], 3 );
|
||||
ROUND1_8W( D, E, A, B, C, F8W_3, 13, in[ 0], 3 );
|
||||
ROUND1_8W( C, D, E, A, B, F8W_3, 6, in[ 6], 3 );
|
||||
ROUND1_8W( B, C, D, E, A, F8W_3, 5, in[13], 3 );
|
||||
ROUND1_8W( A, B, C, D, E, F8W_3, 12, in[11], 3 );
|
||||
ROUND1_8W( E, A, B, C, D, F8W_3, 7, in[ 5], 3 );
|
||||
ROUND1_8W( D, E, A, B, C, F8W_3, 5, in[12], 3 );
|
||||
|
||||
ROUND1_8W( C, D, E, A, B, F8W_4, 11, in[ 1], 4 );
|
||||
ROUND1_8W( B, C, D, E, A, F8W_4, 12, in[ 9], 4 );
|
||||
ROUND1_8W( A, B, C, D, E, F8W_4, 14, in[11], 4 );
|
||||
ROUND1_8W( E, A, B, C, D, F8W_4, 15, in[10], 4 );
|
||||
ROUND1_8W( D, E, A, B, C, F8W_4, 14, in[ 0], 4 );
|
||||
ROUND1_8W( C, D, E, A, B, F8W_4, 15, in[ 8], 4 );
|
||||
ROUND1_8W( B, C, D, E, A, F8W_4, 9, in[12], 4 );
|
||||
ROUND1_8W( A, B, C, D, E, F8W_4, 8, in[ 4], 4 );
|
||||
ROUND1_8W( E, A, B, C, D, F8W_4, 9, in[13], 4 );
|
||||
ROUND1_8W( D, E, A, B, C, F8W_4, 14, in[ 3], 4 );
|
||||
ROUND1_8W( C, D, E, A, B, F8W_4, 5, in[ 7], 4 );
|
||||
ROUND1_8W( B, C, D, E, A, F8W_4, 6, in[15], 4 );
|
||||
ROUND1_8W( A, B, C, D, E, F8W_4, 8, in[14], 4 );
|
||||
ROUND1_8W( E, A, B, C, D, F8W_4, 6, in[ 5], 4 );
|
||||
ROUND1_8W( D, E, A, B, C, F8W_4, 5, in[ 6], 4 );
|
||||
ROUND1_8W( C, D, E, A, B, F8W_4, 12, in[ 2], 4 );
|
||||
|
||||
ROUND1_8W( B, C, D, E, A, F8W_5, 9, in[ 4], 5 );
|
||||
ROUND1_8W( A, B, C, D, E, F8W_5, 15, in[ 0], 5 );
|
||||
ROUND1_8W( E, A, B, C, D, F8W_5, 5, in[ 5], 5 );
|
||||
ROUND1_8W( D, E, A, B, C, F8W_5, 11, in[ 9], 5 );
|
||||
ROUND1_8W( C, D, E, A, B, F8W_5, 6, in[ 7], 5 );
|
||||
ROUND1_8W( B, C, D, E, A, F8W_5, 8, in[12], 5 );
|
||||
ROUND1_8W( A, B, C, D, E, F8W_5, 13, in[ 2], 5 );
|
||||
ROUND1_8W( E, A, B, C, D, F8W_5, 12, in[10], 5 );
|
||||
ROUND1_8W( D, E, A, B, C, F8W_5, 5, in[14], 5 );
|
||||
ROUND1_8W( C, D, E, A, B, F8W_5, 12, in[ 1], 5 );
|
||||
ROUND1_8W( B, C, D, E, A, F8W_5, 13, in[ 3], 5 );
|
||||
ROUND1_8W( A, B, C, D, E, F8W_5, 14, in[ 8], 5 );
|
||||
ROUND1_8W( E, A, B, C, D, F8W_5, 11, in[11], 5 );
|
||||
ROUND1_8W( D, E, A, B, C, F8W_5, 8, in[ 6], 5 );
|
||||
ROUND1_8W( C, D, E, A, B, F8W_5, 5, in[15], 5 );
|
||||
ROUND1_8W( B, C, D, E, A, F8W_5, 6, in[13], 5 );
|
||||
|
||||
ROUND2_8W( A, B, C, D, E, F8W_5, 8, in[ 5], 1 );
|
||||
ROUND2_8W( E, A, B, C, D, F8W_5, 9, in[14], 1 );
|
||||
ROUND2_8W( D, E, A, B, C, F8W_5, 9, in[ 7], 1 );
|
||||
ROUND2_8W( C, D, E, A, B, F8W_5, 11, in[ 0], 1 );
|
||||
ROUND2_8W( B, C, D, E, A, F8W_5, 13, in[ 9], 1 );
|
||||
ROUND2_8W( A, B, C, D, E, F8W_5, 15, in[ 2], 1 );
|
||||
ROUND2_8W( E, A, B, C, D, F8W_5, 15, in[11], 1 );
|
||||
ROUND2_8W( D, E, A, B, C, F8W_5, 5, in[ 4], 1 );
|
||||
ROUND2_8W( C, D, E, A, B, F8W_5, 7, in[13], 1 );
|
||||
ROUND2_8W( B, C, D, E, A, F8W_5, 7, in[ 6], 1 );
|
||||
ROUND2_8W( A, B, C, D, E, F8W_5, 8, in[15], 1 );
|
||||
ROUND2_8W( E, A, B, C, D, F8W_5, 11, in[ 8], 1 );
|
||||
ROUND2_8W( D, E, A, B, C, F8W_5, 14, in[ 1], 1 );
|
||||
ROUND2_8W( C, D, E, A, B, F8W_5, 14, in[10], 1 );
|
||||
ROUND2_8W( B, C, D, E, A, F8W_5, 12, in[ 3], 1 );
|
||||
ROUND2_8W( A, B, C, D, E, F8W_5, 6, in[12], 1 );
|
||||
|
||||
ROUND2_8W( E, A, B, C, D, F8W_4, 9, in[ 6], 2 );
|
||||
ROUND2_8W( D, E, A, B, C, F8W_4, 13, in[11], 2 );
|
||||
ROUND2_8W( C, D, E, A, B, F8W_4, 15, in[ 3], 2 );
|
||||
ROUND2_8W( B, C, D, E, A, F8W_4, 7, in[ 7], 2 );
|
||||
ROUND2_8W( A, B, C, D, E, F8W_4, 12, in[ 0], 2 );
|
||||
ROUND2_8W( E, A, B, C, D, F8W_4, 8, in[13], 2 );
|
||||
ROUND2_8W( D, E, A, B, C, F8W_4, 9, in[ 5], 2 );
|
||||
ROUND2_8W( C, D, E, A, B, F8W_4, 11, in[10], 2 );
|
||||
ROUND2_8W( B, C, D, E, A, F8W_4, 7, in[14], 2 );
|
||||
ROUND2_8W( A, B, C, D, E, F8W_4, 7, in[15], 2 );
|
||||
ROUND2_8W( E, A, B, C, D, F8W_4, 12, in[ 8], 2 );
|
||||
ROUND2_8W( D, E, A, B, C, F8W_4, 7, in[12], 2 );
|
||||
ROUND2_8W( C, D, E, A, B, F8W_4, 6, in[ 4], 2 );
|
||||
ROUND2_8W( B, C, D, E, A, F8W_4, 15, in[ 9], 2 );
|
||||
ROUND2_8W( A, B, C, D, E, F8W_4, 13, in[ 1], 2 );
|
||||
ROUND2_8W( E, A, B, C, D, F8W_4, 11, in[ 2], 2 );
|
||||
|
||||
ROUND2_8W( D, E, A, B, C, F8W_3, 9, in[15], 3 );
|
||||
ROUND2_8W( C, D, E, A, B, F8W_3, 7, in[ 5], 3 );
|
||||
ROUND2_8W( B, C, D, E, A, F8W_3, 15, in[ 1], 3 );
|
||||
ROUND2_8W( A, B, C, D, E, F8W_3, 11, in[ 3], 3 );
|
||||
ROUND2_8W( E, A, B, C, D, F8W_3, 8, in[ 7], 3 );
|
||||
ROUND2_8W( D, E, A, B, C, F8W_3, 6, in[14], 3 );
|
||||
ROUND2_8W( C, D, E, A, B, F8W_3, 6, in[ 6], 3 );
|
||||
ROUND2_8W( B, C, D, E, A, F8W_3, 14, in[ 9], 3 );
|
||||
ROUND2_8W( A, B, C, D, E, F8W_3, 12, in[11], 3 );
|
||||
ROUND2_8W( E, A, B, C, D, F8W_3, 13, in[ 8], 3 );
|
||||
ROUND2_8W( D, E, A, B, C, F8W_3, 5, in[12], 3 );
|
||||
ROUND2_8W( C, D, E, A, B, F8W_3, 14, in[ 2], 3 );
|
||||
ROUND2_8W( B, C, D, E, A, F8W_3, 13, in[10], 3 );
|
||||
ROUND2_8W( A, B, C, D, E, F8W_3, 13, in[ 0], 3 );
|
||||
ROUND2_8W( E, A, B, C, D, F8W_3, 7, in[ 4], 3 );
|
||||
ROUND2_8W( D, E, A, B, C, F8W_3, 5, in[13], 3 );
|
||||
|
||||
ROUND2_8W( C, D, E, A, B, F8W_2, 15, in[ 8], 4 );
|
||||
ROUND2_8W( B, C, D, E, A, F8W_2, 5, in[ 6], 4 );
|
||||
ROUND2_8W( A, B, C, D, E, F8W_2, 8, in[ 4], 4 );
|
||||
ROUND2_8W( E, A, B, C, D, F8W_2, 11, in[ 1], 4 );
|
||||
ROUND2_8W( D, E, A, B, C, F8W_2, 14, in[ 3], 4 );
|
||||
ROUND2_8W( C, D, E, A, B, F8W_2, 14, in[11], 4 );
|
||||
ROUND2_8W( B, C, D, E, A, F8W_2, 6, in[15], 4 );
|
||||
ROUND2_8W( A, B, C, D, E, F8W_2, 14, in[ 0], 4 );
|
||||
ROUND2_8W( E, A, B, C, D, F8W_2, 6, in[ 5], 4 );
|
||||
ROUND2_8W( D, E, A, B, C, F8W_2, 9, in[12], 4 );
|
||||
ROUND2_8W( C, D, E, A, B, F8W_2, 12, in[ 2], 4 );
|
||||
ROUND2_8W( B, C, D, E, A, F8W_2, 9, in[13], 4 );
|
||||
ROUND2_8W( A, B, C, D, E, F8W_2, 12, in[ 9], 4 );
|
||||
ROUND2_8W( E, A, B, C, D, F8W_2, 5, in[ 7], 4 );
|
||||
ROUND2_8W( D, E, A, B, C, F8W_2, 15, in[10], 4 );
|
||||
ROUND2_8W( C, D, E, A, B, F8W_2, 8, in[14], 4 );
|
||||
|
||||
ROUND2_8W( B, C, D, E, A, F8W_1, 8, in[12], 5 );
|
||||
ROUND2_8W( A, B, C, D, E, F8W_1, 5, in[15], 5 );
|
||||
ROUND2_8W( E, A, B, C, D, F8W_1, 12, in[10], 5 );
|
||||
ROUND2_8W( D, E, A, B, C, F8W_1, 9, in[ 4], 5 );
|
||||
ROUND2_8W( C, D, E, A, B, F8W_1, 12, in[ 1], 5 );
|
||||
ROUND2_8W( B, C, D, E, A, F8W_1, 5, in[ 5], 5 );
|
||||
ROUND2_8W( A, B, C, D, E, F8W_1, 14, in[ 8], 5 );
|
||||
ROUND2_8W( E, A, B, C, D, F8W_1, 6, in[ 7], 5 );
|
||||
ROUND2_8W( D, E, A, B, C, F8W_1, 8, in[ 6], 5 );
|
||||
ROUND2_8W( C, D, E, A, B, F8W_1, 13, in[ 2], 5 );
|
||||
ROUND2_8W( B, C, D, E, A, F8W_1, 6, in[13], 5 );
|
||||
ROUND2_8W( A, B, C, D, E, F8W_1, 5, in[14], 5 );
|
||||
ROUND2_8W( E, A, B, C, D, F8W_1, 15, in[ 0], 5 );
|
||||
ROUND2_8W( D, E, A, B, C, F8W_1, 13, in[ 3], 5 );
|
||||
ROUND2_8W( C, D, E, A, B, F8W_1, 11, in[ 9], 5 );
|
||||
ROUND2_8W( B, C, D, E, A, F8W_1, 11, in[11], 5 );
|
||||
|
||||
tmp = _mm256_add_epi32( _mm256_add_epi32( h[1], C1 ), D2 );
|
||||
h[1] = _mm256_add_epi32( _mm256_add_epi32( h[2], D1 ), E2 );
|
||||
h[2] = _mm256_add_epi32( _mm256_add_epi32( h[3], E1 ), A2 );
|
||||
h[3] = _mm256_add_epi32( _mm256_add_epi32( h[4], A1 ), B2 );
|
||||
h[4] = _mm256_add_epi32( _mm256_add_epi32( h[0], B1 ), C2 );
|
||||
h[0] = tmp;
|
||||
}
|
||||
|
||||
|
||||
void ripemd160_8way_init( ripemd160_8way_context *sc )
|
||||
{
|
||||
sc->val[0] = _mm256_set1_epi32( IV[0] );
|
||||
sc->val[1] = _mm256_set1_epi32( IV[1] );
|
||||
sc->val[2] = _mm256_set1_epi32( IV[2] );
|
||||
sc->val[3] = _mm256_set1_epi32( IV[3] );
|
||||
sc->val[4] = _mm256_set1_epi32( IV[4] );
|
||||
sc->count_high = sc->count_low = 0;
|
||||
}
|
||||
|
||||
void ripemd160_8way( ripemd160_8way_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
size_t ptr;
|
||||
const int block_size = 64;
|
||||
|
||||
ptr = (unsigned)sc->count_low & (block_size - 1U);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
uint32_t clow, clow2;
|
||||
|
||||
clen = block_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_256( sc->buf + (ptr>>2), vdata, clen>>2 );
|
||||
vdata = vdata + (clen>>2);
|
||||
ptr += clen;
|
||||
len -= clen;
|
||||
if ( ptr == block_size )
|
||||
{
|
||||
ripemd160_8way_round( sc );
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
clow2 = clow + clen;
|
||||
sc->count_low = clow2;
|
||||
if ( clow2 < clow )
|
||||
sc->count_high++;
|
||||
}
|
||||
}
|
||||
|
||||
void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr, u;
|
||||
uint32_t low, high;
|
||||
const int block_size = 64;
|
||||
const int pad = block_size - 8;
|
||||
|
||||
ptr = (unsigned)sc->count_low & ( block_size - 1U);
|
||||
sc->buf[ ptr>>2 ] = _mm256_set1_epi32( 0x80 );
|
||||
ptr += 4;
|
||||
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_256( sc->buf + (ptr>>2), (block_size - ptr) >> 2 );
|
||||
ripemd160_8way_round( sc );
|
||||
memset_zero_256( sc->buf, pad>>2 );
|
||||
}
|
||||
else
|
||||
memset_zero_256( sc->buf + (ptr>>2), (pad - ptr) >> 2 );
|
||||
|
||||
low = sc->count_low;
|
||||
high = (sc->count_high << 3) | (low >> 29);
|
||||
low = low << 3;
|
||||
sc->buf[ pad>>2 ] = _mm256_set1_epi32( low );
|
||||
sc->buf[ (pad>>2) + 1 ] = _mm256_set1_epi32( high );
|
||||
ripemd160_8way_round( sc );
|
||||
for (u = 0; u < 5; u ++)
|
||||
casti_m256i( dst, u ) = sc->val[u];
|
||||
}
|
||||
|
||||
#endif // __AVX2__
|
||||
|
||||
|
@@ -19,5 +19,20 @@ void ripemd160_4way_init( ripemd160_4way_context *sc );
|
||||
void ripemd160_4way( ripemd160_4way_context *sc, const void *data, size_t len );
|
||||
void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst );
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#if defined (__AVX2__)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
__m256i buf[64>>2];
|
||||
__m256i val[5];
|
||||
uint32_t count_high, count_low;
|
||||
} __attribute__ ((aligned (64))) ripemd160_8way_context;
|
||||
|
||||
void ripemd160_8way_init( ripemd160_8way_context *sc );
|
||||
void ripemd160_8way( ripemd160_8way_context *sc, const void *data, size_t len );
|
||||
void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst );
|
||||
|
||||
|
||||
#endif // __AVX2__
|
||||
#endif // __AVX__
|
||||
#endif // RIPEMD_HASH_4WAY_H__
|
||||
|
@@ -39,7 +39,7 @@
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
// SHA256 4 way 32 bit
|
||||
// SHA-256 32 bit
|
||||
|
||||
static const sph_u32 H256[8] = {
|
||||
SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85),
|
||||
@@ -83,6 +83,8 @@ static const sph_u32 K256[64] = {
|
||||
SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2)
|
||||
};
|
||||
|
||||
// SHA-256 4 way
|
||||
|
||||
#define SHA2s_MEXP( a, b, c, d ) \
|
||||
_mm_add_epi32( _mm_add_epi32( _mm_add_epi32( \
|
||||
SSG2_1( W[a] ), W[b] ), SSG2_0( W[c] ) ), W[d] );
|
||||
@@ -291,13 +293,297 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
|
||||
sc->buf[ ( pad+4 ) >> 2 ] =
|
||||
mm_bswap_32( _mm_set1_epi32( low ) );
|
||||
sha256_4way_round( sc->buf, sc->val );
|
||||
|
||||
for ( u = 0; u < 8; u ++ )
|
||||
((__m128i*)dst)[u] = mm_bswap_32( sc->val[u] );
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// SHA512 4 way 64 bit
|
||||
// SHA-256 8 way
|
||||
|
||||
#define CHx(X, Y, Z) \
|
||||
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )
|
||||
|
||||
#define MAJx(X, Y, Z) \
|
||||
_mm256_or_si256( _mm256_and_si256( X, Y ), \
|
||||
_mm256_and_si256( _mm256_or_si256( X, Y ), Z ) )
|
||||
|
||||
#define BSG2_0x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_32(x, 2), mm256_rotr_32(x, 13) ), mm256_rotr_32( x, 22) )
|
||||
|
||||
#define BSG2_1x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_32(x, 6), mm256_rotr_32(x, 11) ), mm256_rotr_32( x, 25) )
|
||||
|
||||
#define SSG2_0x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_32(x, 7), mm256_rotr_32(x, 18) ), _mm256_srli_epi32(x, 3) )
|
||||
|
||||
#define SSG2_1x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_32(x, 17), mm256_rotr_32(x, 19) ), _mm256_srli_epi32(x, 10) )
|
||||
|
||||
#define SHA2x_MEXP( a, b, c, d ) \
|
||||
_mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( \
|
||||
SSG2_1x( W[a] ), W[b] ), SSG2_0x( W[c] ) ), W[d] );
|
||||
|
||||
#define SHA2s_8WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
|
||||
do { \
|
||||
register __m256i T1, T2; \
|
||||
T1 = _mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( \
|
||||
_mm256_add_epi32( H, BSG2_1x(E) ), CHx(E, F, G) ), \
|
||||
_mm256_set1_epi32( K256[( (j)+(i) )] ) ), W[i] ); \
|
||||
T2 = _mm256_add_epi32( BSG2_0x(A), MAJx(A, B, C) ); \
|
||||
D = _mm256_add_epi32( D, T1 ); \
|
||||
H = _mm256_add_epi32( T1, T2 ); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
sha256_8way_round( __m256i *in, __m256i r[8] )
|
||||
{
|
||||
register __m256i A, B, C, D, E, F, G, H;
|
||||
__m256i W[16];
|
||||
|
||||
W[ 0] = mm256_bswap_32( in[ 0] );
|
||||
W[ 1] = mm256_bswap_32( in[ 1] );
|
||||
W[ 2] = mm256_bswap_32( in[ 2] );
|
||||
W[ 3] = mm256_bswap_32( in[ 3] );
|
||||
W[ 4] = mm256_bswap_32( in[ 4] );
|
||||
W[ 5] = mm256_bswap_32( in[ 5] );
|
||||
W[ 6] = mm256_bswap_32( in[ 6] );
|
||||
W[ 7] = mm256_bswap_32( in[ 7] );
|
||||
W[ 8] = mm256_bswap_32( in[ 8] );
|
||||
W[ 9] = mm256_bswap_32( in[ 9] );
|
||||
W[10] = mm256_bswap_32( in[10] );
|
||||
W[11] = mm256_bswap_32( in[11] );
|
||||
W[12] = mm256_bswap_32( in[12] );
|
||||
W[13] = mm256_bswap_32( in[13] );
|
||||
W[14] = mm256_bswap_32( in[14] );
|
||||
W[15] = mm256_bswap_32( in[15] );
|
||||
|
||||
A = r[0];
|
||||
B = r[1];
|
||||
C = r[2];
|
||||
D = r[3];
|
||||
E = r[4];
|
||||
F = r[5];
|
||||
G = r[6];
|
||||
H = r[7];
|
||||
|
||||
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
|
||||
|
||||
//printf("sha256 8 step: D= %08lx H= %08lx\n",*(uint32_t*)&D,*(uint32_t*)&H);
|
||||
|
||||
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
|
||||
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 2, 0 );
|
||||
SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 3, 0 );
|
||||
SHA2s_8WAY_STEP( E, F, G, H, A, B, C, D, 4, 0 );
|
||||
SHA2s_8WAY_STEP( D, E, F, G, H, A, B, C, 5, 0 );
|
||||
SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 6, 0 );
|
||||
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 7, 0 );
|
||||
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 8, 0 );
|
||||
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 9, 0 );
|
||||
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 10, 0 );
|
||||
SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 11, 0 );
|
||||
SHA2s_8WAY_STEP( E, F, G, H, A, B, C, D, 12, 0 );
|
||||
SHA2s_8WAY_STEP( D, E, F, G, H, A, B, C, 13, 0 );
|
||||
SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 14, 0 );
|
||||
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 15, 0 );
|
||||
|
||||
//printf("sha256 8 step: A= %08lx B= %08lx\n",*(uint32_t*)&A,*(uint32_t*)&B);
|
||||
|
||||
for ( int j = 16; j < 64; j += 16 )
|
||||
{
|
||||
W[ 0] = SHA2x_MEXP( 14, 9, 1, 0 );
|
||||
W[ 1] = SHA2x_MEXP( 15, 10, 2, 1 );
|
||||
W[ 2] = SHA2x_MEXP( 0, 11, 3, 2 );
|
||||
W[ 3] = SHA2x_MEXP( 1, 12, 4, 3 );
|
||||
W[ 4] = SHA2x_MEXP( 2, 13, 5, 4 );
|
||||
W[ 5] = SHA2x_MEXP( 3, 14, 6, 5 );
|
||||
W[ 6] = SHA2x_MEXP( 4, 15, 7, 6 );
|
||||
W[ 7] = SHA2x_MEXP( 5, 0, 8, 7 );
|
||||
W[ 8] = SHA2x_MEXP( 6, 1, 9, 8 );
|
||||
W[ 9] = SHA2x_MEXP( 7, 2, 10, 9 );
|
||||
W[10] = SHA2x_MEXP( 8, 3, 11, 10 );
|
||||
W[11] = SHA2x_MEXP( 9, 4, 12, 11 );
|
||||
W[12] = SHA2x_MEXP( 10, 5, 13, 12 );
|
||||
W[13] = SHA2x_MEXP( 11, 6, 14, 13 );
|
||||
W[14] = SHA2x_MEXP( 12, 7, 15, 14 );
|
||||
W[15] = SHA2x_MEXP( 13, 8, 0, 15 );
|
||||
|
||||
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 0, j );
|
||||
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 1, j );
|
||||
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 2, j );
|
||||
SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 3, j );
|
||||
SHA2s_8WAY_STEP( E, F, G, H, A, B, C, D, 4, j );
|
||||
SHA2s_8WAY_STEP( D, E, F, G, H, A, B, C, 5, j );
|
||||
SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 6, j );
|
||||
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 7, j );
|
||||
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 8, j );
|
||||
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 9, j );
|
||||
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 10, j );
|
||||
SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 11, j );
|
||||
SHA2s_8WAY_STEP( E, F, G, H, A, B, C, D, 12, j );
|
||||
SHA2s_8WAY_STEP( D, E, F, G, H, A, B, C, 13, j );
|
||||
SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 14, j );
|
||||
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 15, j );
|
||||
}
|
||||
|
||||
r[0] = _mm256_add_epi32( r[0], A );
|
||||
r[1] = _mm256_add_epi32( r[1], B );
|
||||
r[2] = _mm256_add_epi32( r[2], C );
|
||||
r[3] = _mm256_add_epi32( r[3], D );
|
||||
r[4] = _mm256_add_epi32( r[4], E );
|
||||
r[5] = _mm256_add_epi32( r[5], F );
|
||||
r[6] = _mm256_add_epi32( r[6], G );
|
||||
r[7] = _mm256_add_epi32( r[7], H );
|
||||
}
|
||||
|
||||
|
||||
void sha256_8way_init( sha256_8way_context *sc )
|
||||
{
|
||||
sc->count_high = sc->count_low = 0;
|
||||
sc->val[0] = _mm256_set1_epi32( H256[0] );
|
||||
sc->val[1] = _mm256_set1_epi32( H256[1] );
|
||||
sc->val[2] = _mm256_set1_epi32( H256[2] );
|
||||
sc->val[3] = _mm256_set1_epi32( H256[3] );
|
||||
sc->val[4] = _mm256_set1_epi32( H256[4] );
|
||||
sc->val[5] = _mm256_set1_epi32( H256[5] );
|
||||
sc->val[6] = _mm256_set1_epi32( H256[6] );
|
||||
sc->val[7] = _mm256_set1_epi32( H256[7] );
|
||||
}
|
||||
|
||||
void sha256_8way( sha256_8way_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
size_t ptr;
|
||||
const int buf_size = 64;
|
||||
/*
|
||||
printf("sha256 8 update1: len= %d\n", len);
|
||||
uint32_t* d = (uint32_t*)data;
|
||||
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
|
||||
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
|
||||
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[128],d[136],d[144],d[152]);
|
||||
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[160],d[168],d[176],d[184]);
|
||||
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[192],d[200],d[208],d[216]);
|
||||
*/
|
||||
ptr = (unsigned)sc->count_low & (buf_size - 1U);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
uint32_t clow, clow2;
|
||||
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_256( sc->buf + (ptr>>2), vdata, clen>>2 );
|
||||
vdata = vdata + (clen>>2);
|
||||
ptr += clen;
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
/*
|
||||
printf("sha256 8 update2: compress\n");
|
||||
d = (uint32_t*)sc->buf;
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
|
||||
d= (uint32_t*)sc->val;
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
*/
|
||||
sha256_8way_round( sc->buf, sc->val );
|
||||
/*
|
||||
printf("sha256 8 update3\n");
|
||||
d= (uint32_t*)sc->val;
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
*/
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
clow2 = SPH_T32( clow + clen );
|
||||
sc->count_low = clow2;
|
||||
if ( clow2 < clow )
|
||||
sc->count_high++;
|
||||
}
|
||||
}
|
||||
|
||||
void sha256_8way_close( sha256_8way_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr, u;
|
||||
uint32_t low, high;
|
||||
const int buf_size = 64;
|
||||
const int pad = buf_size - 8;
|
||||
|
||||
ptr = (unsigned)sc->count_low & (buf_size - 1U);
|
||||
/*
|
||||
printf("sha256 8 close1: ptr= %d\n", ptr);
|
||||
uint32_t* d = (uint32_t*)sc->buf;
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
|
||||
*/
|
||||
|
||||
sc->buf[ ptr>>2 ] = _mm256_set1_epi32( 0x80 );
|
||||
ptr += 4;
|
||||
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_256( sc->buf + (ptr>>2), (buf_size - ptr) >> 2 );
|
||||
|
||||
//printf("sha256 8 close2: compress\n");
|
||||
//uint32_t* d = (uint32_t*)sc->buf;
|
||||
//printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
|
||||
|
||||
sha256_8way_round( sc->buf, sc->val );
|
||||
|
||||
//d= (uint32_t*)sc->val;
|
||||
//printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
|
||||
memset_zero_256( sc->buf, pad >> 2 );
|
||||
}
|
||||
else
|
||||
memset_zero_256( sc->buf + (ptr>>2), (pad - ptr) >> 2 );
|
||||
|
||||
low = sc->count_low;
|
||||
high = (sc->count_high << 3) | (low >> 29);
|
||||
low = low << 3;
|
||||
|
||||
sc->buf[ pad >> 2 ] =
|
||||
mm256_bswap_32( _mm256_set1_epi32( high ) );
|
||||
sc->buf[ ( pad+4 ) >> 2 ] =
|
||||
mm256_bswap_32( _mm256_set1_epi32( low ) );
|
||||
/*
|
||||
d = (uint32_t*)sc->buf;
|
||||
printf("sha256 8 close3: compress\n");
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
|
||||
d= (uint32_t*)sc->val;
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
*/
|
||||
|
||||
sha256_8way_round( sc->buf, sc->val );
|
||||
/*
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
*/
|
||||
for ( u = 0; u < 8; u ++ )
|
||||
((__m256i*)dst)[u] = mm256_bswap_32( sc->val[u] );
|
||||
}
|
||||
|
||||
|
||||
// SHA-512 4 way 64 bit
|
||||
|
||||
static const sph_u64 H512[8] = {
|
||||
SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
|
||||
|
@@ -46,7 +46,9 @@
|
||||
|
||||
#if defined(__AVX__)
|
||||
|
||||
#define SPH_SIZE_sha256 256
|
||||
//#define SPH_SIZE_sha256 256
|
||||
|
||||
// SHA-256 4 way
|
||||
|
||||
typedef struct {
|
||||
__m128i buf[64>>2];
|
||||
@@ -60,7 +62,21 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst );
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
#define SPH_SIZE_sha512 512
|
||||
// SHA-256 8 way
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[64>>2];
|
||||
__m256i val[8];
|
||||
uint32_t count_high, count_low;
|
||||
} sha256_8way_context;
|
||||
|
||||
void sha256_8way_init( sha256_8way_context *sc );
|
||||
void sha256_8way( sha256_8way_context *sc, const void *data, size_t len );
|
||||
void sha256_8way_close( sha256_8way_context *sc, void *dst );
|
||||
|
||||
//#define SPH_SIZE_sha512 512
|
||||
|
||||
// SHA-512 4 way
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[128>>3];
|
||||
|
@@ -39,7 +39,6 @@ int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = first_nonce;
|
||||
// hash is returned deinterleaved
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
|
||||
// data is 80 bytes, 20 u32 or 4 u64.
|
||||
@@ -48,47 +47,23 @@ int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
mm256_interleave_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
skeinhash_4way( hash, vdata );
|
||||
|
||||
if ( hash[7] < Htarg && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
// always put nonce0 in work data for compartibility with
|
||||
// non vectored algos.
|
||||
pdata[19] = n;
|
||||
}
|
||||
if ( (hash+8)[7] < Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
}
|
||||
if ( (hash+16)[7] < Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
}
|
||||
if ( (hash+24)[7] < Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
|
@@ -36,51 +36,29 @@ int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = first_nonce;
|
||||
// hash is returned deinterleaved
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
mm256_interleave_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
skein2hash( hash, vdata );
|
||||
|
||||
if ( hash[7] < Htarg && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
}
|
||||
if ( (hash+8)[7] < Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
}
|
||||
if ( (hash+16)[7] < Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
}
|
||||
if ( (hash+24)[7] < Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
|
@@ -61,12 +61,8 @@ int scanhash_whirlpool_4way( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
@@ -83,42 +79,19 @@ int scanhash_whirlpool_4way( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
|
||||
do {
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
pdata[19] = n;
|
||||
|
||||
whirlpool_hash_4way( hash, vdata );
|
||||
|
||||
pdata[19] = n;
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio(work, hash);
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
|
@@ -171,12 +171,8 @@ int scanhash_c11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
@@ -195,42 +191,21 @@ int scanhash_c11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
c11_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -191,12 +191,8 @@ int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
int i;
|
||||
@@ -224,45 +220,24 @@ int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
timetravel_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
@@ -229,12 +229,8 @@ int scanhash_timetravel10_4way( int thr_id, struct work *work,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
int i;
|
||||
@@ -262,42 +258,20 @@ int scanhash_timetravel10_4way( int thr_id, struct work *work,
|
||||
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
timetravel10_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );
|
||||
|
@@ -70,12 +70,8 @@ int scanhash_tribus_4way(int thr_id, struct work *work, uint32_t max_nonce, uint
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
@@ -112,49 +108,24 @@ int scanhash_tribus_4way(int thr_id, struct work *work, uint32_t max_nonce, uint
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
tribus_hash_4way( hash, vdata );
|
||||
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( !(hash[7] & mask) )
|
||||
&& fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( !( (hash+(i<<3))[7] & mask ) )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio(work, hash);
|
||||
}
|
||||
if ( ( !((hash+8)[7] & mask) )
|
||||
&& fulltest (hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio(work, hash+8);
|
||||
}
|
||||
if ( ( !((hash+16)[7] & mask) )
|
||||
&& fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio(work, hash+16);
|
||||
}
|
||||
if ( ( !((hash+24)[7] & mask) )
|
||||
&& fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio(work, hash+24);
|
||||
}
|
||||
n += 4;
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart);
|
||||
break;
|
||||
|
@@ -170,12 +170,8 @@ int scanhash_x11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
@@ -194,42 +190,21 @@ int scanhash_x11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
x11_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -243,12 +243,8 @@ int scanhash_x11evo_4way( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
@@ -278,42 +274,21 @@ int scanhash_x11evo_4way( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
x11evo_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & hmask ) == 0 && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & hmask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] & hmask ) == 0 && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( (hash+16)[7] & hmask ) == 0 && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( (hash+24)[7] & hmask ) == 0 && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -177,12 +177,8 @@ int scanhash_x11gost_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
@@ -201,42 +197,21 @@ int scanhash_x11gost_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
x11gost_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -199,12 +199,8 @@ int scanhash_x12_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
@@ -223,42 +219,21 @@ int scanhash_x12_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
x12_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -120,12 +120,8 @@ int scanhash_phi1612_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
if ( opt_benchmark )
|
||||
@@ -138,42 +134,20 @@ int scanhash_phi1612_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
phi1612_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -84,12 +84,8 @@ int scanhash_skunk_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
@@ -102,42 +98,20 @@ int scanhash_skunk_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
skunk_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n +=4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );
|
||||
|
@@ -195,12 +195,8 @@ int scanhash_x13_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
@@ -219,42 +215,21 @@ int scanhash_x13_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
x13_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -220,12 +220,8 @@ int scanhash_x13sm3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
@@ -247,42 +243,21 @@ int scanhash_x13sm3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
x13sm3_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -114,12 +114,8 @@ int scanhash_polytimos_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
@@ -132,42 +128,20 @@ int scanhash_polytimos_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
polytimos_4way_hash(hash, vdata);
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
|
@@ -89,12 +89,8 @@ int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
if ( opt_benchmark )
|
||||
@@ -108,42 +104,20 @@ int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
veltor_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );
|
||||
|
@@ -205,12 +205,8 @@ int scanhash_x14_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
@@ -229,42 +225,21 @@ int scanhash_x14_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
x14_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -224,12 +224,8 @@ int scanhash_x15_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
@@ -248,42 +244,21 @@ int scanhash_x15_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
x15_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -314,12 +314,8 @@ int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
for ( int k=0; k < 19; k++ )
|
||||
@@ -342,41 +338,20 @@ int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
x16r_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );
|
||||
|
@@ -235,12 +235,8 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
@@ -259,42 +255,21 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
x17_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -384,12 +384,8 @@ int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
@@ -403,43 +399,21 @@ int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
xevan_4way_blake512_midstate( vdata );
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
xevan_4way_hash( hash, vdata );
|
||||
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] <= Htarg ) && fulltest( hash, ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] <= Htarg ) && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( (hash+16)[7] <= Htarg ) && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( (hash+24)[7] <= Htarg ) && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
84
api.c
84
api.c
@@ -98,6 +98,7 @@ extern int opt_api_remote;
|
||||
extern double global_hashrate;
|
||||
extern uint32_t accepted_count;
|
||||
extern uint32_t rejected_count;
|
||||
extern uint32_t solved_count;
|
||||
|
||||
#define cpu_threads opt_n_threads
|
||||
|
||||
@@ -110,18 +111,19 @@ extern int cpu_fanpercent(void);
|
||||
|
||||
static void cpustatus(int thr_id)
|
||||
{
|
||||
if (thr_id >= 0 && thr_id < opt_n_threads) {
|
||||
struct cpu_info *cpu = &thr_info[thr_id].cpu;
|
||||
char buf[512]; *buf = '\0';
|
||||
if ( thr_id >= 0 && thr_id < opt_n_threads )
|
||||
{
|
||||
// struct cpu_info *cpu = &thr_info[thr_id].cpu;
|
||||
char buf[512]; *buf = '\0';
|
||||
char units[4] = {0};
|
||||
double hashrate = thr_hashrates[thr_id];
|
||||
|
||||
cpu->thr_id = thr_id;
|
||||
cpu->khashes = thr_hashrates[thr_id] / 1000.0; //todo: stats_get_speed(thr_id, 0.0) / 1000.0;
|
||||
|
||||
snprintf(buf, sizeof(buf), "CPU=%d;KHS=%.2f|", thr_id, cpu->khashes);
|
||||
|
||||
// append to buffer
|
||||
strcat(buffer, buf);
|
||||
}
|
||||
scale_hash_for_display ( &hashrate, units );
|
||||
snprintf( buf, sizeof(buf), "CPU=%d;%sH/s=%.2f|", thr_id, units,
|
||||
hashrate );
|
||||
// append to buffer
|
||||
strcat( buffer, buf );
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
@@ -129,42 +131,42 @@ static void cpustatus(int thr_id)
|
||||
/**
|
||||
* Returns miner global infos
|
||||
*/
|
||||
static char *getsummary(char *params)
|
||||
static char *getsummary( char *params )
|
||||
{
|
||||
char algo[64]; *algo = '\0';
|
||||
time_t ts = time(NULL);
|
||||
double uptime = difftime(ts, startup);
|
||||
double accps = (60.0 * accepted_count) / (uptime ? uptime : 1.0);
|
||||
double diff = net_diff > 0. ? net_diff : stratum_diff;
|
||||
char diff_str[16];
|
||||
|
||||
struct cpu_info cpu = { 0 };
|
||||
char algo[64]; *algo = '\0';
|
||||
time_t ts = time(NULL);
|
||||
double uptime = difftime(ts, startup);
|
||||
double accps = (60.0 * accepted_count) / (uptime ? uptime : 1.0);
|
||||
double diff = net_diff > 0. ? net_diff : stratum_diff;
|
||||
char diff_str[16];
|
||||
double hrate = (double)global_hashrate;
|
||||
struct cpu_info cpu = { 0 };
|
||||
#ifdef USE_MONITORING
|
||||
cpu.has_monitoring = true;
|
||||
cpu.cpu_temp = cpu_temp(0);
|
||||
cpu.cpu_fan = cpu_fanpercent();
|
||||
cpu.cpu_clock = cpu_clock(0);
|
||||
cpu.has_monitoring = true;
|
||||
cpu.cpu_temp = cpu_temp(0);
|
||||
cpu.cpu_fan = cpu_fanpercent();
|
||||
cpu.cpu_clock = cpu_clock(0);
|
||||
#endif
|
||||
|
||||
get_currentalgo(algo, sizeof(algo));
|
||||
get_currentalgo(algo, sizeof(algo));
|
||||
|
||||
// if diff is integer don't display decimals
|
||||
if ( diff == trunc( diff ) )
|
||||
sprintf( diff_str, "%.0f", diff);
|
||||
else
|
||||
sprintf( diff_str, "%.6f", diff);
|
||||
// if diff is integer don't display decimals
|
||||
if ( diff == trunc( diff ) )
|
||||
sprintf( diff_str, "%.0f", diff);
|
||||
else
|
||||
sprintf( diff_str, "%.6f", diff);
|
||||
|
||||
*buffer = '\0';
|
||||
sprintf(buffer, "NAME=%s;VER=%s;API=%s;"
|
||||
"ALGO=%s;CPUS=%d;KHS=%.2f;ACC=%d;REJ=%d;"
|
||||
"ACCMN=%.3f;DIFF=%s;TEMP=%.1f;FAN=%d;FREQ=%d;"
|
||||
"UPTIME=%.0f;TS=%u|",
|
||||
PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
|
||||
algo, opt_n_threads, (double)global_hashrate / 1000.0,
|
||||
accepted_count, rejected_count, accps, diff_str,
|
||||
cpu.cpu_temp, cpu.cpu_fan, cpu.cpu_clock,
|
||||
uptime, (uint32_t) ts);
|
||||
return buffer;
|
||||
*buffer = '\0';
|
||||
sprintf( buffer, "NAME=%s;VER=%s;API=%s;"
|
||||
"ALGO=%s;CPUS=%d;HS=%.2f;KHS=%.2f;ACC=%d;REJ=%d;SOL=%d;"
|
||||
"ACCMN=%.3f;DIFF=%s;TEMP=%.1f;FAN=%d;FREQ=%d;"
|
||||
"UPTIME=%.0f;TS=%u|",
|
||||
PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
|
||||
algo, opt_n_threads, hrate, hrate/1000.0,
|
||||
accepted_count, rejected_count, solved_count,
|
||||
accps, diff_str, cpu.cpu_temp, cpu.cpu_fan, cpu.cpu_clock,
|
||||
uptime, (uint32_t) ts);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
|
128
avxdefs.h
128
avxdefs.h
@@ -182,6 +182,10 @@ static inline __m128i foo()
|
||||
// returns p[i]
|
||||
#define casti_m128i(p,i) (((__m128i*)(p))[(i)])
|
||||
|
||||
// p = any aligned pointer, o = scaled offset
|
||||
// returns p+o
|
||||
#define casto_m128i(p,i) (((__m128i*)(p))+(i))
|
||||
|
||||
//
|
||||
// Memory functions
|
||||
// n = number of __m128i, bytes/16
|
||||
@@ -671,6 +675,10 @@ typedef union m256_v8 m256_v8;
|
||||
// returns p[i]
|
||||
#define casti_m256i(p,i) (((__m256i*)(p))[(i)])
|
||||
|
||||
// p = any aligned pointer, o = scaled offset
|
||||
// returns p+o
|
||||
#define casto_m256i(p,i) (((__m256i*)(p))+(i))
|
||||
|
||||
//
|
||||
// Memory functions
|
||||
// n = number of 256 bit (32 byte) vectors
|
||||
@@ -1529,6 +1537,38 @@ static inline void mm256_interleave_8x32( void *dst, const void *src0,
|
||||
s3[18], s2[18], s1[18], s0[18] );
|
||||
d[19] = _mm256_set_epi32( s7[19], s6[19], s5[19], s4[19],
|
||||
s3[19], s2[19], s1[19], s0[19] );
|
||||
|
||||
if ( bit_len <= 640 ) return;
|
||||
|
||||
d[20] = _mm256_set_epi32( s7[20], s6[20], s5[20], s4[20],
|
||||
s3[20], s2[20], s1[20], s0[20] );
|
||||
d[21] = _mm256_set_epi32( s7[21], s6[21], s5[21], s4[21],
|
||||
s3[21], s2[21], s1[21], s0[21] );
|
||||
d[22] = _mm256_set_epi32( s7[22], s6[22], s5[22], s4[22],
|
||||
s3[22], s2[22], s1[22], s0[22] );
|
||||
d[23] = _mm256_set_epi32( s7[23], s6[23], s5[23], s4[23],
|
||||
s3[23], s2[23], s1[23], s0[23] );
|
||||
|
||||
if ( bit_len <= 768 ) return;
|
||||
|
||||
d[24] = _mm256_set_epi32( s7[24], s6[24], s5[24], s4[24],
|
||||
s3[24], s2[24], s1[24], s0[24] );
|
||||
d[25] = _mm256_set_epi32( s7[25], s6[25], s5[25], s4[25],
|
||||
s3[25], s2[25], s1[25], s0[25] );
|
||||
d[26] = _mm256_set_epi32( s7[26], s6[26], s5[26], s4[26],
|
||||
s3[26], s2[26], s1[26], s0[26] );
|
||||
d[27] = _mm256_set_epi32( s7[27], s6[27], s5[27], s4[27],
|
||||
s3[27], s2[27], s1[27], s0[27] );
|
||||
d[28] = _mm256_set_epi32( s7[28], s6[28], s5[28], s4[28],
|
||||
s3[28], s2[28], s1[28], s0[28] );
|
||||
d[29] = _mm256_set_epi32( s7[29], s6[29], s5[29], s4[29],
|
||||
s3[29], s2[29], s1[29], s0[29] );
|
||||
d[30] = _mm256_set_epi32( s7[30], s6[30], s5[30], s4[30],
|
||||
s3[30], s2[30], s1[30], s0[30] );
|
||||
d[31] = _mm256_set_epi32( s7[31], s6[31], s5[31], s4[31],
|
||||
s3[31], s2[31], s1[31], s0[31] );
|
||||
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
// probably obsolete with double pack 2x32->64, 4x64->256.
|
||||
@@ -1607,31 +1647,71 @@ static inline void mm256_deinterleave_8x32( void *dst0, void *dst1, void *dst2,
|
||||
|
||||
// null change for overrun space, vector indexing doesn't work for
|
||||
// 32 bit data
|
||||
if ( bit_len <= 640 )
|
||||
{
|
||||
uint32_t *d = ((uint32_t*)d0) + 8;
|
||||
d0[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[152], s[144], s[136], s[128] );
|
||||
d = ((uint32_t*)d1) + 8;
|
||||
d1[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[153], s[145], s[137], s[129] );
|
||||
d = ((uint32_t*)d2) + 8;
|
||||
d2[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[154], s[146], s[138], s[130]);
|
||||
d = ((uint32_t*)d3) + 8;
|
||||
d3[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[155], s[147], s[139], s[131] );
|
||||
d = ((uint32_t*)d4) + 8;
|
||||
d4[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[156], s[148], s[140], s[132] );
|
||||
d = ((uint32_t*)d5) + 8;
|
||||
d5[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[157], s[149], s[141], s[133] );
|
||||
d = ((uint32_t*)d6) + 8;
|
||||
d6[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[158], s[150], s[142], s[134] );
|
||||
d = ((uint32_t*)d7) + 8;
|
||||
d7[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[159], s[151], s[143], s[135] );
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t *d = ((uint32_t*)d0) + 8;
|
||||
d0[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[152], s[144], s[136], s[128] );
|
||||
d = ((uint32_t*)d1) + 8;
|
||||
d1[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[153], s[145], s[137], s[129] );
|
||||
d = ((uint32_t*)d2) + 8;
|
||||
d2[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[154], s[146], s[138], s[130]);
|
||||
d = ((uint32_t*)d3) + 8;
|
||||
d3[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[155], s[147], s[139], s[131] );
|
||||
d = ((uint32_t*)d4) + 8;
|
||||
d4[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[156], s[148], s[140], s[132] );
|
||||
d = ((uint32_t*)d5) + 8;
|
||||
d5[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[157], s[149], s[141], s[133] );
|
||||
d = ((uint32_t*)d6) + 8;
|
||||
d6[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[158], s[150], s[142], s[134] );
|
||||
d = ((uint32_t*)d7) + 8;
|
||||
d7[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[159], s[151], s[143], s[135] );
|
||||
d0[2] = _mm256_set_epi32( s[184], s[176], s[168], s[160],
|
||||
s[152], s[144], s[136], s[128] );
|
||||
d1[2] = _mm256_set_epi32( s[185], s[177], s[169], s[161],
|
||||
s[153], s[145], s[137], s[129] );
|
||||
d2[2] = _mm256_set_epi32( s[186], s[178], s[170], s[162],
|
||||
s[154], s[146], s[138], s[130] );
|
||||
d3[2] = _mm256_set_epi32( s[187], s[179], s[171], s[163],
|
||||
s[155], s[147], s[139], s[131] );
|
||||
d4[2] = _mm256_set_epi32( s[188], s[180], s[172], s[164],
|
||||
s[156], s[148], s[140], s[132] );
|
||||
d5[2] = _mm256_set_epi32( s[189], s[181], s[173], s[165],
|
||||
s[157], s[149], s[141], s[133] );
|
||||
d6[2] = _mm256_set_epi32( s[190], s[182], s[174], s[166],
|
||||
s[158], s[150], s[142], s[134] );
|
||||
d7[2] = _mm256_set_epi32( s[191], s[183], s[175], s[167],
|
||||
s[159], s[151], s[143], s[135] );
|
||||
|
||||
if ( bit_len <= 768 ) return;
|
||||
|
||||
d0[3] = _mm256_set_epi32( s[248], s[240], s[232], s[224],
|
||||
s[216], s[208], s[200], s[192] );
|
||||
d1[3] = _mm256_set_epi32( s[249], s[241], s[233], s[225],
|
||||
s[217], s[209], s[201], s[193] );
|
||||
d2[3] = _mm256_set_epi32( s[250], s[242], s[234], s[226],
|
||||
s[218], s[210], s[202], s[194] );
|
||||
d3[3] = _mm256_set_epi32( s[251], s[243], s[235], s[227],
|
||||
s[219], s[211], s[203], s[195] );
|
||||
d4[3] = _mm256_set_epi32( s[252], s[244], s[236], s[228],
|
||||
s[220], s[212], s[204], s[196] );
|
||||
d5[3] = _mm256_set_epi32( s[253], s[245], s[237], s[229],
|
||||
s[221], s[213], s[205], s[197] );
|
||||
d6[3] = _mm256_set_epi32( s[254], s[246], s[238], s[230],
|
||||
s[222], s[214], s[206], s[198] );
|
||||
d7[3] = _mm256_set_epi32( s[255], s[247], s[239], s[231],
|
||||
s[223], s[215], s[207], s[199] );
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
// Deinterleave 8 arrays into indivdual buffers for scalar processing
|
||||
|
4
build.sh
4
build.sh
@@ -18,8 +18,8 @@ rm -f config.status
|
||||
# Debian 7.7 / Ubuntu 14.04 (gcc 4.7+)
|
||||
#extracflags="$extracflags -Ofast -flto -fuse-linker-plugin -ftree-loop-if-convert-stores"
|
||||
|
||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl --with-crypto=$HOME/usr
|
||||
#CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
||||
#CFLAGS="-O3 -march=native -Wall" ./configure --with-curl --with-crypto=$HOME/usr
|
||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
||||
#CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
|
||||
|
||||
make -j 4
|
||||
|
27
buildjdd.sh
Executable file
27
buildjdd.sh
Executable file
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
|
||||
#if [ "$OS" = "Windows_NT" ]; then
|
||||
# ./mingw64.sh
|
||||
# exit 0
|
||||
#fi
|
||||
|
||||
# Linux build
|
||||
|
||||
make distclean || echo clean
|
||||
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
|
||||
# Ubuntu 10.04 (gcc 4.4)
|
||||
# extracflags="-O3 -march=native -Wall -D_REENTRANT -funroll-loops -fvariable-expansion-in-unroller -fmerge-all-constants -fbranch-target-load-optimize2 -fsched2-use-superblocks -falign-loops=16 -falign-functions=16 -falign-jumps=16 -falign-labels=16"
|
||||
|
||||
# Debian 7.7 / Ubuntu 14.04 (gcc 4.7+)
|
||||
#extracflags="$extracflags -Ofast -flto -fuse-linker-plugin -ftree-loop-if-convert-stores"
|
||||
|
||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl --with-crypto=$HOME/usr
|
||||
#CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
||||
#CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
|
||||
|
||||
make -j 4
|
||||
|
||||
strip -s cpuminer
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.2.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.3.2.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.8.2'
|
||||
PACKAGE_STRING='cpuminer-opt 3.8.2'
|
||||
PACKAGE_VERSION='3.8.3.2'
|
||||
PACKAGE_STRING='cpuminer-opt 3.8.3.2'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.8.2 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.8.3.2 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1392,7 +1392,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.8.2:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.8.3.2:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1497,7 +1497,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.8.2
|
||||
cpuminer-opt configure 3.8.3.2
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.8.2, which was
|
||||
It was created by cpuminer-opt $as_me 3.8.3.2, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2981,7 +2981,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.8.2'
|
||||
VERSION='3.8.3.2'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.8.2, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.8.3.2, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6743,7 +6743,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.8.2
|
||||
cpuminer-opt config.status 3.8.3.2
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.8.2])
|
||||
AC_INIT([cpuminer-opt], [3.8.3.2])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
72
cpu-miner.c
72
cpu-miner.c
@@ -140,6 +140,7 @@ bool opt_stratum_stats = false;
|
||||
|
||||
uint32_t accepted_count = 0L;
|
||||
uint32_t rejected_count = 0L;
|
||||
uint32_t solved_count = 0L;
|
||||
double *thr_hashrates;
|
||||
double *thr_hashcount;
|
||||
double global_hashcount = 0;
|
||||
@@ -432,6 +433,7 @@ static bool get_mininginfo(CURL *curl, struct work *work)
|
||||
return true;
|
||||
}
|
||||
|
||||
// hodl needs 4 but leave it at 3 until gbt better understood
|
||||
#define BLOCK_VERSION_CURRENT 3
|
||||
|
||||
static bool gbt_work_decode(const json_t *val, struct work *work)
|
||||
@@ -769,6 +771,8 @@ static int share_result( int result, struct work *work, const char *reason )
|
||||
float rate;
|
||||
char rate_s[8] = {0};
|
||||
double sharediff = work ? work->sharediff : stratum.sharediff;
|
||||
bool solved = result && (net_diff > 0.0 ) && ( sharediff >= net_diff );
|
||||
char sol[32] = {0};
|
||||
int i;
|
||||
|
||||
pthread_mutex_lock(&stats_lock);
|
||||
@@ -778,6 +782,16 @@ static int share_result( int result, struct work *work, const char *reason )
|
||||
hashrate += thr_hashrates[i];
|
||||
}
|
||||
result ? accepted_count++ : rejected_count++;
|
||||
|
||||
if ( solved )
|
||||
{
|
||||
solved_count++;
|
||||
if ( use_colors )
|
||||
sprintf( sol, CL_GRN " Solved" CL_WHT " %d", solved_count );
|
||||
else
|
||||
sprintf( sol, " Solved %d", solved_count );
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&stats_lock);
|
||||
global_hashcount = hashcount;
|
||||
global_hashrate = hashrate;
|
||||
@@ -787,9 +801,13 @@ static int share_result( int result, struct work *work, const char *reason )
|
||||
: ( 100. * rejected_count / total_submits ) );
|
||||
|
||||
if (use_colors)
|
||||
{
|
||||
sres = (result ? CL_GRN "Accepted" CL_WHT : CL_RED "Rejected" CL_WHT );
|
||||
}
|
||||
else
|
||||
{
|
||||
sres = (result ? "Accepted" : "Rejected" );
|
||||
}
|
||||
|
||||
// Contrary to rounding convention 100% means zero rejects, exactly 100%.
|
||||
// Rates > 99% and < 100% (rejects>0) display 99.9%.
|
||||
@@ -844,13 +862,13 @@ static int share_result( int result, struct work *work, const char *reason )
|
||||
else
|
||||
{
|
||||
#if ((defined(_WIN64) || defined(__WINDOWS__)))
|
||||
applog( LOG_NOTICE, "%s %lu/%lu (%s%%), diff %.3g, %s %sH/s",
|
||||
applog( LOG_NOTICE, "%s %lu/%lu (%s%%), diff %.3g%s, %s %sH/s",
|
||||
sres, ( result ? accepted_count : rejected_count ),
|
||||
total_submits, rate_s, sharediff, hr, hr_units );
|
||||
total_submits, rate_s, sharediff, sol, hr, hr_units );
|
||||
#else
|
||||
applog( LOG_NOTICE, "%s %lu/%lu (%s%%), diff %.3g, %s %sH/s, %dC",
|
||||
applog( LOG_NOTICE, "%s %lu/%lu (%s%%), diff %.3g%s, %s %sH/s, %dC",
|
||||
sres, ( result ? accepted_count : rejected_count ),
|
||||
total_submits, rate_s, sharediff, hr, hr_units,
|
||||
total_submits, rate_s, sharediff, sol, hr, hr_units,
|
||||
(uint32_t)cpu_temp(0) );
|
||||
#endif
|
||||
}
|
||||
@@ -1549,6 +1567,7 @@ void SHA256_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
|
||||
}
|
||||
}
|
||||
|
||||
// default
|
||||
void std_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / opt_diff_factor );
|
||||
@@ -1558,7 +1577,7 @@ void scrypt_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (65536.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
// another popular choice.
|
||||
void alt_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
@@ -1608,6 +1627,14 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
{
|
||||
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
|
||||
|
||||
// the job_id check doesn't work as intended, it's a char pointer!
|
||||
// For stratum the pointers can be dereferenced and the strings compared,
|
||||
// benchmark not, getwork & gbt unsure.
|
||||
// || ( have_straum && strcmp( work->job_id, g_work->job_id ) ) ) )
|
||||
// or
|
||||
// || ( !benchmark && strcmp( work->job_id, g_work->job_id ) ) ) )
|
||||
// For now leave it as is, it seems stable.
|
||||
|
||||
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
|
||||
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr )
|
||||
|| ( work->job_id != g_work->job_id ) ) )
|
||||
@@ -1617,7 +1644,7 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
*nonceptr = 0xffffffffU / opt_n_threads * thr_id;
|
||||
if ( opt_randomize )
|
||||
*nonceptr += ( (rand() *4 ) & UINT32_MAX ) / opt_n_threads;
|
||||
*end_nonce_ptr = ( 0xffffffffU / opt_n_threads ) * (thr_id+1) - 0x20;
|
||||
*end_nonce_ptr = ( 0xffffffffU / opt_n_threads ) * (thr_id+1) - 0x20;
|
||||
}
|
||||
else
|
||||
++(*nonceptr);
|
||||
@@ -1767,6 +1794,8 @@ static void *miner_thread( void *userdata )
|
||||
{
|
||||
algo_gate.wait_for_diff( &stratum );
|
||||
pthread_mutex_lock( &g_work_lock );
|
||||
if ( *algo_gate.get_nonceptr( work.data ) >= end_nonce )
|
||||
algo_gate.stratum_gen_work( &stratum, &g_work );
|
||||
algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce,
|
||||
stratum.job.clean );
|
||||
pthread_mutex_unlock( &g_work_lock );
|
||||
@@ -1866,33 +1895,38 @@ static void *miner_thread( void *userdata )
|
||||
hashes_done / (diff.tv_sec + diff.tv_usec * 1e-6);
|
||||
pthread_mutex_unlock(&stats_lock);
|
||||
}
|
||||
|
||||
// if nonce(s) submit work
|
||||
if ( nonce_found && !opt_benchmark )
|
||||
{
|
||||
/*
|
||||
int num_submitted = 0;
|
||||
// look for 4way nonces
|
||||
for ( int n = 0; n < 4; n++ )
|
||||
if ( work.nfound[n] )
|
||||
|
||||
for ( int n = 0; n < nonce_found; n++ )
|
||||
{
|
||||
*algo_gate.get_nonceptr( work.data ) = work.nonces[n];
|
||||
if ( submit_work( mythr, &work ) )
|
||||
{
|
||||
*algo_gate.get_nonceptr( work.data ) = work.nonces[n];
|
||||
if ( !submit_work( mythr, &work ) )
|
||||
{
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
break;
|
||||
}
|
||||
applog( LOG_NOTICE, "Share submitted." );
|
||||
num_submitted++;
|
||||
applog( LOG_NOTICE, "Share submitted." );
|
||||
num_submitted++;
|
||||
}
|
||||
else
|
||||
{
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
break;
|
||||
}
|
||||
}
|
||||
// must be a one way algo, nonce is already in work data
|
||||
if ( !num_submitted )
|
||||
{
|
||||
*/
|
||||
if ( !submit_work( mythr, &work ) )
|
||||
{
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
break;
|
||||
}
|
||||
applog( LOG_NOTICE, "Share submitted." );
|
||||
}
|
||||
// }
|
||||
|
||||
// prevent stale work in solo
|
||||
// we can't submit twice a block!
|
||||
@@ -2348,7 +2382,7 @@ static void *stratum_thread(void *userdata )
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
if (net_diff > 0.)
|
||||
applog(LOG_BLUE, "%s block %d, diff %.3f",
|
||||
applog(LOG_BLUE, "%s block %d, network diff %.3f",
|
||||
algo_names[opt_algo], stratum.bloc_height, net_diff);
|
||||
else
|
||||
applog(LOG_BLUE, "%s %s block %d", short_url,
|
||||
|
3
miner.h
3
miner.h
@@ -361,7 +361,6 @@ struct work {
|
||||
size_t xnonce2_len;
|
||||
unsigned char *xnonce2;
|
||||
uint32_t nonces[8];
|
||||
bool nfound[8];
|
||||
};
|
||||
|
||||
struct stratum_job {
|
||||
@@ -451,7 +450,7 @@ void applog_hash(void *hash);
|
||||
void format_hashrate(double hashrate, char *output);
|
||||
void print_hash_tests(void);
|
||||
|
||||
|
||||
void scale_hash_for_display ( double* hashrate, char* units );
|
||||
|
||||
struct thr_info {
|
||||
int id;
|
||||
|
Reference in New Issue
Block a user